-
Notifications
You must be signed in to change notification settings - Fork 1
/
itemembd.py
118 lines (90 loc) · 3.81 KB
/
itemembd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import os
import logging
import tensorflow as tf
from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
from tensorflow.python.estimator.export.export_output import PredictOutput
SIGNATURE_NAME = "serving_default"
LEARNING_RATE = 0.001
USER_EMBEDDING_TENSOR_NAME = "user_embedding"
ITEM_EMBEDDING_TENSOR_NAME = "item_embedding"
def model_fn(features, labels, mode, params):
embedding_size = 30
num_users = len(np.unique(features[USER_EMBEDDING_TENSOR_NAME]))
num_items = len(np.unique(features[ITEM_EMBEDDING_TENSOR_NAME]))
user_embedding = tf.keras.layers.Embedding(
output_dim=embedding_size,
input_dim=57018,
input_length=1,
name='user',
)(features[USER_EMBEDDING_TENSOR_NAME])
item_embedding = tf.keras.layers.Embedding(
output_dim=embedding_size,
input_dim=296556,
input_length=1,
name='item',
)(features[ITEM_EMBEDDING_TENSOR_NAME])
user_vecs = tf.keras.layers.Reshape([embedding_size])(user_embedding)
item_vecs = tf.keras.layers.Reshape([embedding_size])(item_embedding)
y = tf.keras.layers.Dot(1, normalize=False)([user_vecs, item_vecs])
predictions = tf.reshape(y, [-1])
if mode == tf.estimator.ModeKeys.PREDICT:
user = features[USER_EMBEDDING_TENSOR_NAME]
item = features[ITEM_EMBEDDING_TENSOR_NAME]
current_item = tf.nn.embedding_lookup(params=item_vecs, ids=item)
distance = euclidean_distances(current_item, item_vecs)
predictions = tf.nn.top_k(distance, 100, sorted=True)
predictions = predictions.indices
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={"products": predictions},
export_outputs={SIGNATURE_NAME: PredictOutput({"products": predictions})}
)
# 2. Define the loss function for training/evaluation using Tensorflow.
loss = tf.losses.mean_squared_error(labels, predictions)
# 3. Define the training operation/optimizer using Tensorflow operation/optimizer.
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.001,
optimizer="Adam",
)
#
# 4. Generate predictions as Tensorflow tensors.
predictions_dict = {"predictions": predictions}
# 5. Generate necessary evaluation metrics.
# Calculate root mean squared error as additional eval metric
eval_metric_ops = {
"mse": tf.metrics.mean_squared_error(tf.cast(labels, tf.float32), predictions)
}
# Provide an estimator spec for `ModeKeys.EVAL` and `ModeKeys.TRAIN` modes.
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops
)
def serving_input_fn(params):
user = tf.placeholder(tf.int64, shape=[1])
item = tf.placeholder(tf.int64, shape=[1])
return build_raw_serving_input_receiver_fn({
USER_EMBEDDING_TENSOR_NAME: user, ITEM_EMBEDDING_TENSOR_NAME: item
})()
params = {"learning_rate": LEARNING_RATE}
def train_input_fn(training_dir, params):
return _input_fn(training_dir, 'train.csv')
def eval_input_fn(training_dir, params):
return _input_fn(training_dir, 'test.csv')
def _input_fn(training_dir, training_filename):
training_set = tf.contrib.learn.datasets.base.load_csv_without_header(
filename=os.path.join(training_dir, training_filename),
target_dtype=np.int,
features_dtype=np.int
)
return tf.estimator.inputs.numpy_input_fn(
x={
USER_EMBEDDING_TENSOR_NAME: np.array(training_set.data[:, 0]),
ITEM_EMBEDDING_TENSOR_NAME: np.array(training_set.data[:, 1])
},
y=np.array(training_set.data[:, 2]),
shuffle=True,
batch_size=64,
num_epochs=None
)()