diff --git a/docs/images/models/parallel_dssm.png b/docs/images/models/parallel_dssm.png new file mode 100644 index 000000000..bcbd5025f Binary files /dev/null and b/docs/images/models/parallel_dssm.png differ diff --git a/docs/source/models/dssm_derivatives.md b/docs/source/models/dssm_derivatives.md index ba179aecd..9271beb82 100644 --- a/docs/source/models/dssm_derivatives.md +++ b/docs/source/models/dssm_derivatives.md @@ -83,3 +83,13 @@ model_config:{ ### 参考论文 [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) + +## 并行DSSM + +在召回中,我们希望尽可能把不同的特征进行交叉融合,以便提取到隐藏的信息。而不同的特征提取器侧重点不尽相同,比如MLP是隐式特征交叉,FM和DCN都属于显式、有限阶特征交叉, CIN可以实现vector-wise显式交叉。因此可以让信息经由不同的通道向塔顶流动,每种通道各有所长,相互取长补短。最终将各通道得到的Embedding聚合成最终的Embedding,与对侧交互,从而提升召回的效果。 + +![parallel_dssm](../../images/models/parallel_dssm.png) + +### 示例Config + +[parallel_dssm_on_taobao_backbone.config](https://github.com/alibaba/EasyRec/tree/master/samples/model_config/parallel_dssm_on_taobao_backbone.config) diff --git a/easy_rec/python/layers/keras/__init__.py b/easy_rec/python/layers/keras/__init__.py index c4427e5d3..17e7cdb1c 100644 --- a/easy_rec/python/layers/keras/__init__.py +++ b/easy_rec/python/layers/keras/__init__.py @@ -16,6 +16,7 @@ from .fibinet import BiLinear from .fibinet import FiBiNet from .fibinet import SENet +from .interaction import CIN from .interaction import FM from .interaction import Cross from .interaction import DotInteraction diff --git a/easy_rec/python/layers/keras/interaction.py b/easy_rec/python/layers/keras/interaction.py index b44f96f28..9b14f254a 100644 --- a/easy_rec/python/layers/keras/interaction.py +++ b/easy_rec/python/layers/keras/interaction.py @@ -308,5 +308,109 @@ def get_config(self): return dict(list(base_config.items()) + list(config.items())) +class CIN(tf.keras.layers.Layer): + """Compressed Interaction Network(CIN) module in xDeepFM model. + + CIN layer is aimed at achieving high-order feature interactions at + vector-wise level rather than bit-wise level. + + + Reference: + [xDeepFM](https://arxiv.org/pdf/1803.05170) + xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems + """ + + def __init__(self, params, name='cin', reuse=None, **kwargs): + super(CIN, self).__init__(name=name, **kwargs) + self._name = name + self._hidden_feature_sizes = list( + params.get_or_default('hidden_feature_sizes', [])) + + assert isinstance(self._hidden_feature_sizes, list) and len( + self._hidden_feature_sizes + ) > 0, 'parameter hidden_feature_sizes must be a list of int with length greater than 0' + + kernel_regularizer = params.get_or_default('kernel_regularizer', None) + self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) + bias_regularizer = params.get_or_default('bias_regularizer', None) + self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) + + def build(self, input_shape): + if len(input_shape) != 3: + raise ValueError( + 'Unexpected inputs dimensions %d, expect to be 3 dimensions' % + (len(input_shape))) + + hidden_feature_sizes = [input_shape[1] + ] + [h for h in self._hidden_feature_sizes] + tfv1 = tf.compat.v1 if tf.__version__ >= '2.0' else tf + with tfv1.variable_scope(self._name): + self.kernel_list = [ + tfv1.get_variable( + name='cin_kernel_%d' % i, + shape=[ + hidden_feature_sizes[i + 1], hidden_feature_sizes[i], + hidden_feature_sizes[0] + ], + initializer=tf.initializers.he_normal(), + regularizer=self._kernel_regularizer, + trainable=True) for i in range(len(self._hidden_feature_sizes)) + ] + self.bias_list = [ + tfv1.get_variable( + name='cin_bias_%d' % i, + shape=[hidden_feature_sizes[i + 1]], + initializer=tf.keras.initializers.Zeros, + regularizer=self._bias_regularizer, + trainable=True) for i in range(len(self._hidden_feature_sizes)) + ] + + super(CIN, self).build(input_shape) + + def call(self, input, **kwargs): + """Computes the compressed feature maps. + + Args: + input: The 3D input tensor with shape (b, h0, d), where b is batch_size, + h0 is the number of features, d is the feature embedding dimension. + + Returns: + 2D tensor of compressed feature map with shape (b, featuremap_num), + where b is the batch_size, featuremap_num is sum of the hidden layer sizes + """ + x_0 = input + x_i = input + x_0_expanded = tf.expand_dims(x_0, 1) + pooled_feature_map_list = [] + for i in range(len(self._hidden_feature_sizes)): + hk = self._hidden_feature_sizes[i] + + x_i_expanded = tf.expand_dims(x_i, 2) + intermediate_tensor = tf.multiply(x_0_expanded, x_i_expanded) + + intermediate_tensor_expanded = tf.expand_dims(intermediate_tensor, 1) + intermediate_tensor_expanded = tf.tile(intermediate_tensor_expanded, + [1, hk, 1, 1, 1]) + + feature_map_elementwise = tf.multiply( + intermediate_tensor_expanded, + tf.expand_dims(tf.expand_dims(self.kernel_list[i], -1), 0)) + feature_map = tf.reduce_sum( + tf.reduce_sum(feature_map_elementwise, axis=3), axis=2) + + feature_map = tf.add( + feature_map, + tf.expand_dims(tf.expand_dims(self.bias_list[i], axis=-1), axis=0)) + feature_map = tf.nn.relu(feature_map) + + x_i = feature_map + pooled_feature_map_list.append(tf.reduce_sum(feature_map, axis=-1)) + return tf.concat( + pooled_feature_map_list, axis=-1) # shape = (b, h1 + ... + hk) + + def get_config(self): + pass + + def _clone_initializer(initializer): return initializer.__class__.from_config(initializer.get_config()) diff --git a/easy_rec/python/protos/keras_layer.proto b/easy_rec/python/protos/keras_layer.proto index 04ece4eb6..2b8047064 100644 --- a/easy_rec/python/protos/keras_layer.proto +++ b/easy_rec/python/protos/keras_layer.proto @@ -36,5 +36,6 @@ message KerasLayer { TextEncoder text_encoder = 25; WeightedGate gate = 26; AITMTower aitm = 27; + CIN cin=28; } } diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index a0438f071..4f45a3d08 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -144,3 +144,7 @@ message AITMTower { optional MLP transfer_mlp = 2; optional bool stop_gradient = 3 [default = true]; } + +message CIN { + repeated int32 hidden_feature_sizes = 1; +} diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py index 65f08c8d9..eecce2363 100644 --- a/easy_rec/python/test/train_eval_test.py +++ b/easy_rec/python/test/train_eval_test.py @@ -1267,6 +1267,13 @@ def test_dssm_senet_backbone_on_taobao(self): self._test_dir) self.assertTrue(self._success) + @unittest.skipIf(gl is None, 'graphlearn is not installed') + def test_parallel_dssm_backbone_on_taobao(self): + self._success = test_utils.test_single_train_eval( + 'samples/model_config/parallel_dssm_on_taobao_backbone.config', + self._test_dir) + self.assertTrue(self._success) + if __name__ == '__main__': tf.test.main() diff --git a/samples/model_config/parallel_dssm_on_taobao_backbone.config b/samples/model_config/parallel_dssm_on_taobao_backbone.config new file mode 100644 index 000000000..cee82fa27 --- /dev/null +++ b/samples/model_config/parallel_dssm_on_taobao_backbone.config @@ -0,0 +1,589 @@ +train_input_path: "data/test/tb_data/taobao_train_data" +eval_input_path: "data/test/tb_data/taobao_test_data" +model_dir: "experiments/parallel_dssm_taobao_ckpt" + +train_config { + log_step_count_steps: 200 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + # initial_learning_rate: 0.001 + initial_learning_rate: 0.0001 + decay_steps: 4000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 4000 + sync_replicas: false + num_steps: 100000 +} + +eval_config { + + metrics_set: { + recall_at_topk { + topk: 50 + } + } + metrics_set: { + recall_at_topk { + topk: 10 + } + } + metrics_set: { + recall_at_topk { + topk: 5 + } + } + metrics_set: { + recall_at_topk { + topk: 1 + } + } +} + +data_config { + input_fields { + input_name:'clk' + input_type: INT32 + } + input_fields { + input_name:'buy' + input_type: INT32 + } + input_fields { + input_name: 'pid' + input_type: STRING + } + input_fields { + input_name: 'adgroup_id' + input_type: STRING + } + input_fields { + input_name: 'cate_id' + input_type: STRING + } + input_fields { + input_name: 'campaign_id' + input_type: STRING + } + input_fields { + input_name: 'customer' + input_type: STRING + } + input_fields { + input_name: 'brand' + input_type: STRING + } + input_fields { + input_name: 'user_id' + input_type: STRING + } + input_fields { + input_name: 'cms_segid' + input_type: STRING + } + input_fields { + input_name: 'cms_group_id' + input_type: STRING + } + input_fields { + input_name: 'final_gender_code' + input_type: STRING + } + input_fields { + input_name: 'age_level' + input_type: STRING + } + input_fields { + input_name: 'pvalue_level' + input_type: STRING + } + input_fields { + input_name: 'shopping_level' + input_type: STRING + } + input_fields { + input_name: 'occupation' + input_type: STRING + } + input_fields { + input_name: 'new_user_class_level' + input_type: STRING + } + input_fields { + input_name: 'tag_category_list' + input_type: STRING + } + input_fields { + input_name: 'tag_brand_list' + input_type: STRING + } + input_fields { + input_name: 'price' + input_type: INT32 + } + + label_fields: 'clk' + batch_size: 4096 + num_epochs: 10000 + prefetch_size: 32 + input_type: CSVInput + + negative_sampler { + input_path: 'data/test/tb_data/taobao_ad_feature_gl' + num_sample: 1024 + num_eval_sample: 2048 + attr_fields: 'adgroup_id' + attr_fields: 'cate_id' + attr_fields: 'campaign_id' + attr_fields: 'customer' + attr_fields: 'brand' + item_id_field: 'adgroup_id' + } +} + +feature_config: { + features: { + input_names: 'pid' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'adgroup_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'cate_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10000 + } + features: { + input_names: 'campaign_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'customer' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'brand' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'cms_segid' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'cms_group_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'final_gender_code' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'age_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'pvalue_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'shopping_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'occupation' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'new_user_class_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'tag_category_list' + feature_type: TagFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + } + features: { + input_names: 'tag_brand_list' + feature_type: TagFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + } + features: { + input_names: 'price' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 50 + } +} +model_config:{ + model_name: "Parallel_DSSM" + model_class: 'MatchModel' + feature_groups: { + group_name: 'user_mlp_feature' + feature_names: 'user_id' + feature_names: 'cms_segid' + feature_names: 'cms_group_id' + feature_names: 'age_level' + feature_names: 'pvalue_level' + feature_names: 'shopping_level' + feature_names: 'occupation' + feature_names: 'new_user_class_level' + feature_names: 'tag_category_list' + feature_names: 'tag_brand_list' + feature_names: 'final_gender_code' + wide_deep:DEEP + } + feature_groups: { + group_name: 'user_dcn_feature' + feature_names: 'user_id' + feature_names: 'cms_segid' + feature_names: 'cms_group_id' + feature_names: 'age_level' + feature_names: 'pvalue_level' + feature_names: 'shopping_level' + feature_names: 'occupation' + feature_names: 'new_user_class_level' + feature_names: 'tag_category_list' + feature_names: 'tag_brand_list' + feature_names: 'final_gender_code' + wide_deep:DEEP + } + feature_groups: { + group_name: 'user_fm_feature' + feature_names: 'user_id' + feature_names: 'cms_segid' + feature_names: 'cms_group_id' + feature_names: 'age_level' + feature_names: 'pvalue_level' + feature_names: 'shopping_level' + feature_names: 'occupation' + feature_names: 'new_user_class_level' + feature_names: 'tag_category_list' + feature_names: 'tag_brand_list' + feature_names: 'final_gender_code' + wide_deep:DEEP + } + feature_groups: { + group_name: 'user_cin_feature' + feature_names: 'user_id' + feature_names: 'cms_segid' + feature_names: 'cms_group_id' + feature_names: 'age_level' + feature_names: 'pvalue_level' + feature_names: 'shopping_level' + feature_names: 'occupation' + feature_names: 'new_user_class_level' + feature_names: 'tag_category_list' + feature_names: 'tag_brand_list' + feature_names: 'final_gender_code' + wide_deep:DEEP + } + + feature_groups: { + group_name: "item_mlp_feature" + feature_names: 'adgroup_id' + feature_names: 'cate_id' + feature_names: 'campaign_id' + feature_names: 'customer' + feature_names: 'brand' + #feature_names: 'price' + #feature_names: 'pid' + wide_deep:DEEP + } + feature_groups: { + group_name: "item_dcn_feature" + feature_names: 'adgroup_id' + feature_names: 'cate_id' + feature_names: 'campaign_id' + feature_names: 'customer' + feature_names: 'brand' + #feature_names: 'price' + #feature_names: 'pid' + wide_deep:DEEP + } + feature_groups: { + group_name: "item_fm_feature" + feature_names: 'adgroup_id' + feature_names: 'cate_id' + feature_names: 'campaign_id' + feature_names: 'customer' + feature_names: 'brand' + #feature_names: 'price' + #feature_names: 'pid' + wide_deep:DEEP + } + feature_groups: { + group_name: "item_cin_feature" + feature_names: 'adgroup_id' + feature_names: 'cate_id' + feature_names: 'campaign_id' + feature_names: 'customer' + feature_names: 'brand' + #feature_names: 'price' + #feature_names: 'pid' + wide_deep:DEEP + } + + backbone { + blocks { + name: 'user_mlp' + inputs { + feature_group_name: 'user_mlp_feature' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [128, 32] + } + } + } + blocks { + name: 'user_dcn' + inputs { + feature_group_name: 'user_dcn_feature' + input_fn: 'lambda x: [x, x]' + } + recurrent { + num_steps: 3 + fixed_input_index: 0 + keras_layer { + class_name: 'Cross' + } + } + } + blocks { + name: 'user_dcn_out' + inputs { + block_name: 'user_dcn' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [32] + } + } + } + + blocks { + name: 'user_fm_feature' + inputs { + feature_group_name: 'user_fm_feature' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'user_fm' + inputs { + block_name: 'user_fm_feature' + input_slice: '[1]' + } + keras_layer { + class_name: 'FM' + } + } + + blocks { + name: 'user_cin_feature' + inputs { + feature_group_name: 'user_cin_feature' + } + input_layer { + only_output_3d_tensor: true + } + } + + blocks { + name: 'user_cin' + inputs { + block_name: 'user_cin_feature' + } + keras_layer { + class_name: 'CIN' + cin { + hidden_feature_sizes: [16, 16, 16] + } + } + } + + blocks { + name: 'item_mlp' + inputs { + feature_group_name: 'item_mlp_feature' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [128, 32] + } + } + } + blocks { + name: 'item_dcn' + inputs { + feature_group_name: 'item_dcn_feature' + input_fn: 'lambda x: [x, x]' + } + recurrent { + num_steps: 3 + fixed_input_index: 0 + keras_layer { + class_name: 'Cross' + } + } + } + blocks { + name: 'item_dcn_out' + inputs { + block_name: 'item_dcn' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [32] + } + } + } + + blocks { + name: 'item_fm_feature' + inputs { + feature_group_name: 'item_fm_feature' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'item_fm' + inputs { + block_name: 'item_fm_feature' + input_slice: '[1]' + } + keras_layer { + class_name: 'FM' + } + } + blocks { + name: 'item_cin_feature' + inputs { + feature_group_name: 'item_cin_feature' + } + input_layer { + only_output_3d_tensor: true + } + } + blocks { + name: 'item_cin' + inputs { + block_name: 'item_cin_feature' + } + keras_layer { + class_name: 'CIN' + cin { + hidden_feature_sizes: [16, 16, 16] + } + } + } + + blocks { + name: 'user_tower_embedding' + inputs { + block_name: 'user_mlp' + } + inputs { + block_name: 'user_dcn_out' + } + inputs { + block_name: 'user_fm' + } + inputs { + block_name: 'user_cin' + } + + merge_inputs_into_list: true + lambda { + expression: 'lambda x: tf.concat(x, axis=1)' + } + } + blocks { + name: 'item_tower_embedding' + inputs { + block_name: 'item_mlp' + } + inputs { + block_name: 'item_dcn_out' + } + inputs { + block_name: 'item_fm' + } + inputs { + block_name: 'item_cin' + } + merge_inputs_into_list: true + lambda { + expression: 'lambda x: tf.concat(x, axis=1)' + } + } + + output_blocks: ['user_tower_embedding', 'item_tower_embedding'] + } + model_params { + l2_regularization: 1e-4 + user_tower_idx_in_output: 0 + item_tower_idx_in_output: 1 + scale_simi: false + simi_func: INNER_PRODUCT + } + loss_type: SOFTMAX_CROSS_ENTROPY + embedding_regularization: 5e-5 +} + +export_config { +}