Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify document detail #517

Closed
wants to merge 14 commits into from
74 changes: 74 additions & 0 deletions docker/Dockerfile_tf112
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#FROM tensorflow/tensorflow:1.12.0
FROM my_tensorflow_base:1.12.0-py2

COPY docker/sources_18.04.list /etc/apt/sources.list

# necessary for later commands to take effect
RUN md5sum /etc/apt/sources.list \
&& apt-get update \
&& apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y \
&& apt-get install build-essential cython -y \
&& pip install cython \
&& pip install setuptools_scm
# 检查 Cython 是否安装成功
RUN python -c "import Cython"
RUN pip --version

RUN mkdir /EasyRec
COPY requirements /EasyRec/requirements
COPY requirements.txt /EasyRec/
COPY easy_rec /EasyRec/easy_rec/
COPY setup.cfg /EasyRec/
COPY setup.py /EasyRec/
COPY MANIFEST.in /EasyRec/
COPY README.md /EasyRec/
COPY scripts /EasyRec/scripts

RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip
RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip
RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd

RUN pip install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
# 升级pip
RUN pip install --upgrade pip setuptools wheel

# 安装 setuptools-rust 和 rustc
RUN pip install setuptools-rust
RUN pip install tensorflow_probability==0.5.0
RUN apt-get update && apt-get install -y rustc
RUN apt-get update && \
apt-get install -y rustc && \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
. $HOME/.cargo/env
# 安装 cryptography
RUN pip install cryptography
# 安装基础工具链与依赖项
RUN apt-get update && \
apt-get install -y build-essential libssl-dev libffi-dev python-dev && \
apt-get install -y rustc cargo cmake curl

# 设置国内的 Rust 镜像源
RUN echo '[source.crates-io]\n' > $HOME/.cargo/config
RUN echo 'replace-with = "ustc"' >> $HOME/.cargo/config
RUN echo '[source.ustc]\n' >> $HOME/.cargo/config
RUN echo 'registry = "https://mirrors.ustc.edu.cn/crates.io-index"' >> $HOME/.cargo/config

# 确保 curl 支持 HTTP2
RUN curl -V

# 显示安装好的工具链版本,确保已正确安装
RUN rustc --version
RUN cargo --version
RUN cmake --version

RUN pip install -r /EasyRec/requirements/runtime.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com

RUN pip install --user -U https://tfsmoke1.oss-cn-zhangjiakou.aliyuncs.com/tunnel_paiio/common_io/py2/common_io-0.1.0-cp27-cp27mu-linux_x86_64.whl
RUN pip install graphlearn

RUN cd /EasyRec && python setup.py install
RUN rm -rf /EasyRec
RUN python -c "import easy_rec; import pyhive; import datahub; import kafka"

COPY docker/hadoop_env.sh /opt/hadoop_env.sh
36 changes: 36 additions & 0 deletions docker/Dockerfile_tf115
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
FROM datascience-registry.cn-beijing.cr.aliyuncs.com/tensorflow/tensorflow:1.15.5

COPY docker/sources_18.04.list /etc/apt/sources.list

# necessary for later commands to take effect
RUN md5sum /etc/apt/sources.list

RUN apt-get update
RUN apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y

RUN mkdir /EasyRec
COPY requirements /EasyRec/requirements
COPY requirements.txt /EasyRec/
COPY easy_rec /EasyRec/easy_rec/
COPY setup.cfg /EasyRec/
COPY setup.py /EasyRec/
COPY MANIFEST.in /EasyRec/
COPY README.md /EasyRec/
COPY scripts /EasyRec/scripts

RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip
RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip
RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd
RUN pip3 install --upgrade pip
RUN pip3 install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install -r /EasyRec/requirements/runtime.txt
RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/graphlearn-1.1.0-cp36-cp36m-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/common_io-0.3.0-cp36-cp36m-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install tensorflow_probability==0.8
RUN cd /EasyRec && pip install .
RUN rm -rf /EasyRec
RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka"

COPY docker/hadoop_env.sh /opt/hadoop_env.sh
1 change: 1 addition & 0 deletions docker/Dockerfile_tf210
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ RUN pip3 install -r /EasyRec/requirements/runtime.txt -i http://mirrors.aliyun
RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/graphlearn-1.1.0-cp38-cp38-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
# RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install tensorflow_probability==0.18.0
RUN pip3 install https://dlc-task.oss-cn-hangzhou.aliyuncs.com/whl/common_io-0.4.1%2Btunnel-py2.py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN cd /EasyRec && python setup.py install
RUN rm -rf /EasyRec
Expand Down
36 changes: 36 additions & 0 deletions docker/Dockerfile_tf212
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
FROM tensorflow/tensorflow:2.12.0
COPY docker/sources_20.04.list /etc/apt/sources.list

# necessary for later commands to take effect
RUN md5sum /etc/apt/sources.list

RUN apt-get update
RUN apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y

RUN mkdir /EasyRec
COPY requirements /EasyRec/requirements
COPY requirements.txt /EasyRec/
COPY easy_rec /EasyRec/easy_rec/
COPY setup.cfg /EasyRec/
COPY setup.py /EasyRec/
COPY MANIFEST.in /EasyRec/
COPY README.md /EasyRec/
COPY scripts /EasyRec/scripts

RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip
RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip
RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd

RUN pip3 install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install -r /EasyRec/requirements/runtime.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/graphlearn-1.1.0-cp38-cp38-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
# RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN pip3 install tensorflow_probability==0.20.0
#RUN pip3 install encodings
RUN pip3 install https://dlc-task.oss-cn-hangzhou.aliyuncs.com/whl/common_io-0.4.1%2Btunnel-py2.py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
RUN cd /EasyRec && python setup.py install
RUN rm -rf /EasyRec
# RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka"

COPY docker/hadoop_env.sh /opt/hadoop_env.sh
1 change: 0 additions & 1 deletion docs/source/feature/feature.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ RawFeature:连续值特征
features {
input_names: "ctr"
feature_type: RawFeature
embedding_dim: 8
}
}

Expand Down
34 changes: 31 additions & 3 deletions docs/source/quick_start/local_tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,39 @@

#### 本地Anaconda安装

温馨提示:**在搭载Apple芯片的MacBook上必须使用TensorFlow 2.5或更高版本**。

Demo实验中使用的环境为 `python=3.6.8` + `tenserflow=1.12.0`

```bash
conda create -n py36_tf12 python=3.6.8
conda activate py36_tf12
pip install tensorflow==1.12.0
pip install tensorflow_probability==0.5.0
```

注意:必须要安装`tensorflow_probability`包,需要根据tensorflow的版本安装对应版本的`tensorflow_robability`包。

常见版本对应关系:

| TensorFlow版本 | TensorFlowProbability版本 |
|--------------|-------------------------|
| 1.12 | 0.5.0 |
| 1.15 | 0.8.0 |
| 2.5.0 | 0.13.0 |
| 2.6.0 | 0.14.0 |
| 2.7.0 | 0.15.0 |
| 2.8.0 | 0.16.0 |
| 2.10 | 0.18.0 |
| 2.12 | 0.20.0 |

其他版本对应关系请查看链接:[Releases · tensorflow/probability](https://github.com/tensorflow/probability/releases)。

```bash
git clone https://github.com/alibaba/EasyRec.git
cd EasyRec
bash scripts/init.sh
python setup.py install

```

#### Docker镜像启动
Expand All @@ -33,13 +52,22 @@ Docker的环境为`python=3.6.9` + `tenserflow=1.15.5`
```bash
git clone https://github.com/alibaba/EasyRec.git
cd EasyRec
docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.7.4
docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.7.4
docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.8.5
docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.8.5
docker exec -it <CONTAINER_ID> bash
```

##### 方法二:自行构建Docker镜像

我们提供四个版本的tensorflow镜像构建示例,对应的脚步路径如下:

- scripts/build_docker_tf112.sh
- scripts/build_docker_tf115.sh
- scripts/build_docker_tf210.sh
- scripts/build_docker_tf212.sh

默认使用`tensorflow 1.15`的版本,示例脚本如下,请根据需要替换脚本路径:

```bash
git clone https://github.com/alibaba/EasyRec.git
cd EasyRec
Expand Down
4 changes: 2 additions & 2 deletions docs/source/vector_retrieve.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pai -name easy_rec_ext -project algo_public_dev

## 使用示例

### 1. 创建查询表
### 1. 创建索引表

```sql
create table doc_table(pk BIGINT,vector string) partitioned by (pt string);
Expand All @@ -53,7 +53,7 @@ VALUES
;
```

### 2. 创建索引表
### 2. 创建查询表

```sql
create table query_table(pk BIGINT,vector string) partitioned by (pt string);
Expand Down
3 changes: 3 additions & 0 deletions easy_rec/python/tools/add_boundaries_to_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import json
import logging
import os
import sys

import common_io
import tensorflow as tf

from easy_rec.python.utils import config_util
from easy_rec.python.utils import io_util

if tf.__version__ >= '2.0':
tf = tf.compat.v1
Expand Down Expand Up @@ -61,4 +63,5 @@ def main(argv):


if __name__ == '__main__':
sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
tf.app.run()
3 changes: 3 additions & 0 deletions easy_rec/python/tools/add_feature_info_to_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import json
import logging
import os
import sys

import tensorflow as tf

from easy_rec.python.utils import config_util
from easy_rec.python.utils import io_util
from easy_rec.python.utils.hive_utils import HiveUtils

if tf.__version__ >= '2.0':
Expand Down Expand Up @@ -139,4 +141,5 @@ def main(argv):


if __name__ == '__main__':
sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
tf.app.run()
3 changes: 3 additions & 0 deletions easy_rec/python/tools/faiss_index_pai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import logging
import os
import sys

import faiss
import numpy as np
import tensorflow as tf
from easy_rec.python.utils import io_util

logging.basicConfig(
level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
Expand Down Expand Up @@ -109,4 +111,5 @@ def main(argv):


if __name__ == '__main__':
sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
tf.app.run()
3 changes: 3 additions & 0 deletions easy_rec/python/tools/feature_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import json
import os
import sys
from collections import OrderedDict

import numpy as np
Expand All @@ -11,6 +12,7 @@
from tensorflow.python.framework.meta_graph import read_meta_graph_file

from easy_rec.python.utils import config_util
from easy_rec.python.utils import io_util

if tf.__version__ >= '2.0':
tf = tf.compat.v1
Expand Down Expand Up @@ -299,6 +301,7 @@ def _visualize_feature_importance(self, feature_importance, group_name):


if __name__ == '__main__':
sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
if FLAGS.model_type == 'variational_dropout':
fs = VariationalDropoutFS(
FLAGS.config_path,
Expand Down
3 changes: 3 additions & 0 deletions easy_rec/python/tools/hit_rate_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
import json
import logging
import os
import sys

import graphlearn as gl
import tensorflow as tf

from easy_rec.python.protos.dataset_pb2 import DatasetConfig
from easy_rec.python.utils import config_util
from easy_rec.python.utils import io_util
from easy_rec.python.utils.config_util import process_multi_file_input_path
from easy_rec.python.utils.hit_rate_utils import compute_hitrate_batch
from easy_rec.python.utils.hit_rate_utils import load_graph
Expand Down Expand Up @@ -217,4 +219,5 @@ def main():


if __name__ == '__main__':
sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
main()
3 changes: 3 additions & 0 deletions easy_rec/python/tools/hit_rate_pai.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
from __future__ import division
from __future__ import print_function

import sys
import tensorflow as tf

from easy_rec.python.utils import io_util
from easy_rec.python.utils.hit_rate_utils import compute_hitrate_batch
from easy_rec.python.utils.hit_rate_utils import load_graph
from easy_rec.python.utils.hit_rate_utils import reduce_hitrate
Expand Down Expand Up @@ -131,4 +133,5 @@ def main():


if __name__ == '__main__':
sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
main()
3 changes: 3 additions & 0 deletions easy_rec/python/tools/pre_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import json
import logging
import os
import sys

import tensorflow as tf

from easy_rec.python.input.input import Input
from easy_rec.python.utils import config_util
from easy_rec.python.utils import fg_util
from easy_rec.python.utils import io_util
from easy_rec.python.utils.check_utils import check_env_and_input_path
from easy_rec.python.utils.check_utils import check_sequence

Expand Down Expand Up @@ -114,4 +116,5 @@ def main(argv):


if __name__ == '__main__':
sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
tf.app.run()
Loading
Loading