From 60129a0bf46a271571c6bee21bfcb30978530354 Mon Sep 17 00:00:00 2001 From: ues1df <140159870+ues1df@users.noreply.github.com> Date: Wed, 16 Aug 2023 17:27:55 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E7=9A=84=E9=87=8F=E5=8C=96=E9=80=9A?= =?UTF-8?q?=E8=AE=AF=E7=AE=97=E6=B3=95=E5=92=8C=E7=A4=BA=E4=BE=8B=20(#813)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: 增加浮点数量化方案 * doc: 在拆分学习中使用通讯压缩算法 * doc: 优化示例排版 * doc: 优化排版 * feat: 增加quantizedkmeans和fp8-E5M2 * bugfix: fp8实现修改 * bugfix: 修改qfp代码,添加qfp测试 * doc: 加入压缩模型损失和时间对比 * update: 修改测试代码格式 * update: sort imports --- .../SL_Training_with_compressor.ipynb | 964 ++++++++++++++++++ secretflow/utils/compressor.py | 115 +++ tests/utils/test_compressor.py | 45 + 3 files changed, 1124 insertions(+) create mode 100644 docs/tutorial/SL_Training_with_compressor.ipynb create mode 100644 tests/utils/test_compressor.py diff --git a/docs/tutorial/SL_Training_with_compressor.ipynb b/docs/tutorial/SL_Training_with_compressor.ipynb new file mode 100644 index 000000000..a941f7f1a --- /dev/null +++ b/docs/tutorial/SL_Training_with_compressor.ipynb @@ -0,0 +1,964 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 在拆分学习中使用通讯压缩方法\n", + "\n", + "> 以下代码仅作为示例,请勿在生产环境直接使用。\n", + "\n", + "> 本示例基于基于“拆分学习:银行营销”教程制作,建议先观看那个教程。\n", + "\n", + "在拆分学习中,由于模型被拆分在多个设备当中,进行训练的时候,各方需要对特征和梯度进行多次传输,带来很高的网络通讯消耗。为了减少通讯过程中的数据量,可以进行一些压缩处理。\n", + "\n", + "SecretFlow提供了Compressor对拆分学习中的数据进行压缩。同时也提供了多种基类,可以在此基础上实现自己的压缩算法。\n", + "\n", + "下面我们来试试一些算法的可用性,首先,我们在secretflow环境中创造2个实体alice和bob。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-16 01:43:59,294\tWARNING services.py:1732 -- WARNING: The object store is using /tmp instead of /dev/shm because /dev/shm has only 67108864 bytes available. This will harm performance! You may be able to free up space by deleting files in /dev/shm. If you are inside a Docker container, you can increase /dev/shm size by passing '--shm-size=3.92gb' to 'docker run' (or add it to the run_options list in a Ray cluster config). Make sure to set this to more than 30% of available RAM.\n", + "2023-08-16 01:43:59,444\tINFO worker.py:1538 -- Started a local Ray instance.\n" + ] + } + ], + "source": [ + "import secretflow as sf\n", + "\n", + "sf.shutdown()\n", + "sf.init(['alice', 'bob'], address='local')\n", + "alice, bob = sf.PYU('alice'), sf.PYU('bob')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "接下来我们准备要学习的数据。\n", + "\n", + "我们使用“拆分学习:银行营销”中的数据准备和处理方法,下载银行营销数据集并进行处理。alice和bob的角色和之前的教程完全相同:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(_run pid=27337)\u001b[0m /usr/local/lib/python3.8/site-packages/sklearn/base.py:443: UserWarning: X has feature names, but MinMaxScaler was fitted without feature names\n", + "\u001b[2m\u001b[36m(_run pid=27337)\u001b[0m warnings.warn(\n", + "\u001b[2m\u001b[36m(_run pid=27337)\u001b[0m /usr/local/lib/python3.8/site-packages/sklearn/base.py:443: UserWarning: X has feature names, but MinMaxScaler was fitted without feature names\n", + "\u001b[2m\u001b[36m(_run pid=27337)\u001b[0m warnings.warn(\n" + ] + } + ], + "source": [ + "from secretflow.utils.simulation.datasets import load_bank_marketing\n", + "from secretflow.preprocessing.scaler import MinMaxScaler\n", + "from secretflow.preprocessing.encoder import LabelEncoder\n", + "from secretflow.data.split import train_test_split\n", + "\n", + "random_state = 1234\n", + "\n", + "data = load_bank_marketing(parts={alice: (0, 4), bob: (4, 16)}, axis=1)\n", + "label = load_bank_marketing(parts={alice: (16, 17)}, axis=1)\n", + "\n", + "encoder = LabelEncoder()\n", + "data['job'] = encoder.fit_transform(data['job'])\n", + "data['marital'] = encoder.fit_transform(data['marital'])\n", + "data['education'] = encoder.fit_transform(data['education'])\n", + "data['default'] = encoder.fit_transform(data['default'])\n", + "data['housing'] = encoder.fit_transform(data['housing'])\n", + "data['loan'] = encoder.fit_transform(data['loan'])\n", + "data['contact'] = encoder.fit_transform(data['contact'])\n", + "data['poutcome'] = encoder.fit_transform(data['poutcome'])\n", + "data['month'] = encoder.fit_transform(data['month'])\n", + "label = encoder.fit_transform(label)\n", + "\n", + "scaler = MinMaxScaler()\n", + "data = scaler.fit_transform(data)\n", + "\n", + "train_data,test_data = train_test_split(data, train_size=0.8, random_state=random_state)\n", + "train_label,test_label = train_test_split(label, train_size=0.8, random_state=random_state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "接下来我们创建联邦模型,同样地,我们使用“拆分学习:银行营销”中的建模,构建出base_model和fuse_model,然后就可以定义SLModel用于训练:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-16 01:44:03.512175: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-08-16 01:44:04.209189: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-08-16 01:44:04.209381: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-08-16 01:44:04.209397: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "INFO:root:Create proxy actor with party alice.\n", + "INFO:root:Create proxy actor with party bob.\n" + ] + } + ], + "source": [ + "def create_base_model(input_dim, output_dim, name='base_model'):\n", + " # Create model\n", + " def create_model():\n", + " from tensorflow import keras\n", + " from tensorflow.keras import layers\n", + " import tensorflow as tf\n", + " model = keras.Sequential(\n", + " [\n", + " keras.Input(shape=input_dim),\n", + " layers.Dense(100,activation =\"relu\" ),\n", + " layers.Dense(output_dim, activation=\"relu\"),\n", + " ]\n", + " )\n", + " # Compile model\n", + " model.summary()\n", + " model.compile(loss='binary_crossentropy',\n", + " optimizer='adam',\n", + " metrics=[\"accuracy\",tf.keras.metrics.AUC()])\n", + " return model\n", + " return create_model\n", + "\n", + "# prepare model\n", + "hidden_size = 64\n", + "\n", + "model_base_alice = create_base_model(4, hidden_size)\n", + "model_base_bob = create_base_model(12, hidden_size)\n", + "\n", + "def create_fuse_model(input_dim, output_dim, party_nums, name='fuse_model'):\n", + " def create_model():\n", + " from tensorflow import keras\n", + " from tensorflow.keras import layers\n", + " import tensorflow as tf\n", + " # input\n", + " input_layers = []\n", + " for i in range(party_nums):\n", + " input_layers.append(keras.Input(input_dim,))\n", + "\n", + " merged_layer = layers.concatenate(input_layers)\n", + " fuse_layer = layers.Dense(64, activation='relu')(merged_layer)\n", + " output = layers.Dense(output_dim, activation='sigmoid')(fuse_layer)\n", + "\n", + " model = keras.Model(inputs=input_layers, outputs=output)\n", + " model.summary()\n", + "\n", + " model.compile(loss='binary_crossentropy',\n", + " optimizer='adam',\n", + " metrics=[\"accuracy\",tf.keras.metrics.AUC()])\n", + " return model\n", + " return create_model\n", + "\n", + "model_fuse = create_fuse_model(\n", + " input_dim=hidden_size, party_nums=2, output_dim=1)\n", + "\n", + "base_model_dict = {\n", + " alice: model_base_alice,\n", + " bob: model_base_bob\n", + "}\n", + "\n", + "\n", + "\n", + "from secretflow.ml.nn import SLModel\n", + "sl_model_origin = SLModel(\n", + " base_model_dict=base_model_dict,\n", + " device_y=alice,\n", + " model_fuse=model_fuse,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 使用通讯压缩算法\n", + "\n", + "SecretFlow提供了Compressor,里面实现了各种基础的通讯压缩算法,可以直接使用。\n", + "\n", + "只要导入想使用的压缩算法并实例化,定义SLModel时将实例化的方法作为参数传入就可以在训练中实现通讯压缩。\n", + "\n", + "我们以QuantizedFP为例,该算法会将浮点数量化到8位以降低传输消耗。" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Create proxy actor with party alice.\n", + "INFO:root:Create proxy actor with party bob.\n" + ] + } + ], + "source": [ + "from secretflow.utils.compressor import QuantizedFP\n", + "\n", + "qfp = QuantizedFP()\n", + "\n", + "sl_model_compress = SLModel(\n", + " base_model_dict=base_model_dict,\n", + " device_y=alice,\n", + " model_fuse=model_fuse,\n", + " compressor = qfp #在这里传入实例化的compressor算法\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "我们分别对没有使用通讯压缩的模型和使用了量化压缩的模型进行训练,并把训练轮次拉高到40轮,看看效果如何。" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:SL Train Params: {'x': VDataFrame(partitions={PYURuntime(alice): Partition(data=), PYURuntime(bob): Partition(data=)}, aligned=True), 'y': VDataFrame(partitions={PYURuntime(alice): Partition(data=)}, aligned=True), 'batch_size': 128, 'epochs': 40, 'verbose': 1, 'callbacks': None, 'validation_data': (VDataFrame(partitions={PYURuntime(alice): Partition(data=), PYURuntime(bob): Partition(data=)}, aligned=True), VDataFrame(partitions={PYURuntime(alice): Partition(data=)}, aligned=True)), 'shuffle': True, 'sample_weight': None, 'validation_freq': 1, 'dp_spent_step_freq': None, 'dataset_builder': None, 'audit_log_params': {}, 'random_seed': 11819, 'audit_log_dir': None, 'self': }\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(pid=28114)\u001b[0m 2023-08-16 01:44:08.296739: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28127)\u001b[0m 2023-08-16 01:44:08.551930: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28181)\u001b[0m 2023-08-16 01:44:08.767248: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28235)\u001b[0m 2023-08-16 01:44:09.014466: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28114)\u001b[0m 2023-08-16 01:44:09.160525: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28114)\u001b[0m 2023-08-16 01:44:09.160694: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28114)\u001b[0m 2023-08-16 01:44:09.160713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "\u001b[2m\u001b[36m(pid=28127)\u001b[0m 2023-08-16 01:44:09.418021: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28127)\u001b[0m 2023-08-16 01:44:09.418136: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28127)\u001b[0m 2023-08-16 01:44:09.418152: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "\u001b[2m\u001b[36m(pid=28181)\u001b[0m 2023-08-16 01:44:09.654066: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28181)\u001b[0m 2023-08-16 01:44:09.654235: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28181)\u001b[0m 2023-08-16 01:44:09.654257: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "\u001b[2m\u001b[36m(pid=28235)\u001b[0m 2023-08-16 01:44:09.871219: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28235)\u001b[0m 2023-08-16 01:44:09.871317: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=28235)\u001b[0m 2023-08-16 01:44:09.871333: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m 2023-08-16 01:44:11.224977: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m 2023-08-16 01:44:11.225041: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Model: \"sequential\"\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m _________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Layer (type) Output Shape Param # \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m dense (Dense) (None, 100) 500 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m dense_1 (Dense) (None, 64) 6464 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Total params: 6,964\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Trainable params: 6,964\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Non-trainable params: 0\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m _________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Model: \"model\"\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m __________________________________________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Layer (type) Output Shape Param # Connected to \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m ==================================================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m input_2 (InputLayer) [(None, 64)] 0 [] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m input_3 (InputLayer) [(None, 64)] 0 [] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m concatenate (Concatenate) (None, 128) 0 ['input_2[0][0]', \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m 'input_3[0][0]'] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m dense_2 (Dense) (None, 64) 8256 ['concatenate[0][0]'] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m dense_3 (Dense) (None, 1) 65 ['dense_2[0][0]'] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m ==================================================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Total params: 8,321\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Trainable params: 8,321\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m Non-trainable params: 0\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28114)\u001b[0m __________________________________________________________________________________________________\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m 2023-08-16 01:44:11.487105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m 2023-08-16 01:44:11.487150: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m Model: \"sequential\"\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m _________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m Layer (type) Output Shape Param # \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m dense (Dense) (None, 100) 1300 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m dense_1 (Dense) (None, 64) 6464 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m Total params: 7,764\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m Trainable params: 7,764\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m Non-trainable params: 0\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=28127)\u001b[0m _________________________________________________________________\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/29 [00:00), PYURuntime(bob): Partition(data=)}, aligned=True), 'y': VDataFrame(partitions={PYURuntime(alice): Partition(data=)}, aligned=True), 'batch_size': 128, 'epochs': 40, 'verbose': 1, 'callbacks': None, 'validation_data': (VDataFrame(partitions={PYURuntime(alice): Partition(data=), PYURuntime(bob): Partition(data=)}, aligned=True), VDataFrame(partitions={PYURuntime(alice): Partition(data=)}, aligned=True)), 'shuffle': True, 'sample_weight': None, 'validation_freq': 1, 'dp_spent_step_freq': None, 'dataset_builder': None, 'audit_log_params': {}, 'random_seed': 50480, 'audit_log_dir': None, 'self': }\n", + "100%|██████████| 29/29 [00:03<00:00, 7.41it/s, epoch: 1/40 - train_loss:0.4217776954174042 train_accuracy:0.8659462332725525 train_auc_1:0.5435447692871094 val_loss:0.40626364946365356 val_accuracy:0.8729282021522522 val_auc_1:0.5905393362045288 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.59it/s, epoch: 2/40 - train_loss:0.3423333764076233 train_accuracy:0.8874446749687195 train_auc_1:0.6285374164581299 val_loss:0.3637339770793915 val_accuracy:0.8729282021522522 val_auc_1:0.670577883720398 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 16.02it/s, epoch: 3/40 - train_loss:0.31453219056129456 train_accuracy:0.8949353694915771 train_auc_1:0.6967648267745972 val_loss:0.35318124294281006 val_accuracy:0.8729282021522522 val_auc_1:0.7181453108787537 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.70it/s, epoch: 4/40 - train_loss:0.2924026548862457 train_accuracy:0.8968473672866821 train_auc_1:0.771354079246521 val_loss:0.3476685583591461 val_accuracy:0.8729282021522522 val_auc_1:0.7567088603973389 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.96it/s, epoch: 5/40 - train_loss:0.3236430585384369 train_accuracy:0.8690732717514038 train_auc_1:0.8049758076667786 val_loss:0.32425957918167114 val_accuracy:0.8729282021522522 val_auc_1:0.8028783798217773 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.77it/s, epoch: 6/40 - train_loss:0.2683410346508026 train_accuracy:0.8920454382896423 train_auc_1:0.8347899317741394 val_loss:0.3059132695198059 val_accuracy:0.8696132302284241 val_auc_1:0.8184260129928589 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.99it/s, epoch: 7/40 - train_loss:0.24226166307926178 train_accuracy:0.9022727012634277 train_auc_1:0.850990891456604 val_loss:0.30843329429626465 val_accuracy:0.8729282021522522 val_auc_1:0.832201361656189 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.66it/s, epoch: 8/40 - train_loss:0.23420202732086182 train_accuracy:0.9053977131843567 train_auc_1:0.8667846322059631 val_loss:0.2918694317340851 val_accuracy:0.8795580267906189 val_auc_1:0.8382883071899414 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.87it/s, epoch: 9/40 - train_loss:0.24281850457191467 train_accuracy:0.8993362784385681 train_auc_1:0.8600778579711914 val_loss:0.28592929244041443 val_accuracy:0.8773480653762817 val_auc_1:0.8522564172744751 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 16.07it/s, epoch: 10/40 - train_loss:0.25411662459373474 train_accuracy:0.8985795378684998 train_auc_1:0.8763052225112915 val_loss:0.27862876653671265 val_accuracy:0.8795580267906189 val_auc_1:0.8518161773681641 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.97it/s, epoch: 11/40 - train_loss:0.2467927783727646 train_accuracy:0.9008620977401733 train_auc_1:0.8637750148773193 val_loss:0.27538853883743286 val_accuracy:0.8850829005241394 val_auc_1:0.8585635423660278 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.81it/s, epoch: 12/40 - train_loss:0.24046260118484497 train_accuracy:0.9030172228813171 train_auc_1:0.8943703174591064 val_loss:0.2793208956718445 val_accuracy:0.8872928023338318 val_auc_1:0.8582884073257446 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.74it/s, epoch: 13/40 - train_loss:0.2232421338558197 train_accuracy:0.9109513163566589 train_auc_1:0.9031308889389038 val_loss:0.27965837717056274 val_accuracy:0.8773480653762817 val_auc_1:0.857512354850769 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.98it/s, epoch: 14/40 - train_loss:0.2226562350988388 train_accuracy:0.9120911359786987 train_auc_1:0.8835855722427368 val_loss:0.28520363569259644 val_accuracy:0.8806629776954651 val_auc_1:0.854595422744751 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.90it/s, epoch: 15/40 - train_loss:0.23515889048576355 train_accuracy:0.904902994632721 train_auc_1:0.8961691856384277 val_loss:0.28021782636642456 val_accuracy:0.8850829005241394 val_auc_1:0.8563291430473328 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.79it/s, epoch: 16/40 - train_loss:0.23402053117752075 train_accuracy:0.9024784564971924 train_auc_1:0.8906980752944946 val_loss:0.27909329533576965 val_accuracy:0.8850829005241394 val_auc_1:0.859708309173584 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.93it/s, epoch: 17/40 - train_loss:0.2111150622367859 train_accuracy:0.9189712405204773 train_auc_1:0.8960785865783691 val_loss:0.27899590134620667 val_accuracy:0.8817679286003113 val_auc_1:0.8576114177703857 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.81it/s, epoch: 18/40 - train_loss:0.20241659879684448 train_accuracy:0.915678858757019 train_auc_1:0.9157640933990479 val_loss:0.28282174468040466 val_accuracy:0.8784530162811279 val_auc_1:0.8583985567092896 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.87it/s, epoch: 19/40 - train_loss:0.23259153962135315 train_accuracy:0.9071022868156433 train_auc_1:0.8956990242004395 val_loss:0.2828892171382904 val_accuracy:0.8817679286003113 val_auc_1:0.8546835780143738 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.82it/s, epoch: 20/40 - train_loss:0.22440506517887115 train_accuracy:0.9034845232963562 train_auc_1:0.8989371657371521 val_loss:0.28058287501335144 val_accuracy:0.8784530162811279 val_auc_1:0.8598239421844482 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.94it/s, epoch: 21/40 - train_loss:0.23205137252807617 train_accuracy:0.9051724076271057 train_auc_1:0.8899630308151245 val_loss:0.2741439938545227 val_accuracy:0.8795580267906189 val_auc_1:0.8636598587036133 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.72it/s, epoch: 22/40 - train_loss:0.22656284272670746 train_accuracy:0.9030172228813171 train_auc_1:0.907919704914093 val_loss:0.2767719030380249 val_accuracy:0.8839778900146484 val_auc_1:0.8592514991760254 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.98it/s, epoch: 23/40 - train_loss:0.22055070102214813 train_accuracy:0.9109228849411011 train_auc_1:0.913796067237854 val_loss:0.2815714180469513 val_accuracy:0.8928176760673523 val_auc_1:0.855701744556427 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.60it/s, epoch: 24/40 - train_loss:0.23475250601768494 train_accuracy:0.9043141603469849 train_auc_1:0.9076265692710876 val_loss:0.2773815095424652 val_accuracy:0.8839778900146484 val_auc_1:0.8560759425163269 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 16.07it/s, epoch: 25/40 - train_loss:0.2359710931777954 train_accuracy:0.9005681872367859 train_auc_1:0.9041743278503418 val_loss:0.28951746225357056 val_accuracy:0.8806629776954651 val_auc_1:0.8590589165687561 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.76it/s, epoch: 26/40 - train_loss:0.21646590530872345 train_accuracy:0.9094827771186829 train_auc_1:0.9059643745422363 val_loss:0.27530720829963684 val_accuracy:0.8806629776954651 val_auc_1:0.8600990772247314 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.82it/s, epoch: 27/40 - train_loss:0.21936063468456268 train_accuracy:0.9137930870056152 train_auc_1:0.9077043533325195 val_loss:0.2782182991504669 val_accuracy:0.8861878514289856 val_auc_1:0.8611392974853516 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 16.03it/s, epoch: 28/40 - train_loss:0.21766482293605804 train_accuracy:0.9098451137542725 train_auc_1:0.9155865907669067 val_loss:0.2878170311450958 val_accuracy:0.8806629776954651 val_auc_1:0.8582608103752136 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.72it/s, epoch: 29/40 - train_loss:0.2088153064250946 train_accuracy:0.9126105904579163 train_auc_1:0.9115303754806519 val_loss:0.28278136253356934 val_accuracy:0.8817679286003113 val_auc_1:0.8566923141479492 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.74it/s, epoch: 30/40 - train_loss:0.2089204490184784 train_accuracy:0.9156526327133179 train_auc_1:0.9117385149002075 val_loss:0.2774920165538788 val_accuracy:0.8861878514289856 val_auc_1:0.8575839996337891 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.52it/s, epoch: 31/40 - train_loss:0.20840761065483093 train_accuracy:0.9170354008674622 train_auc_1:0.909948468208313 val_loss:0.29270535707473755 val_accuracy:0.8817679286003113 val_auc_1:0.8584149479866028 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.67it/s, epoch: 32/40 - train_loss:0.21289651095867157 train_accuracy:0.9139933586120605 train_auc_1:0.9137017130851746 val_loss:0.2861045300960541 val_accuracy:0.8872928023338318 val_auc_1:0.8621078729629517 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 16.06it/s, epoch: 33/40 - train_loss:0.20959915220737457 train_accuracy:0.9116379022598267 train_auc_1:0.9085273146629333 val_loss:0.2869407832622528 val_accuracy:0.8828729391098022 val_auc_1:0.8602972030639648 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.91it/s, epoch: 34/40 - train_loss:0.20927441120147705 train_accuracy:0.91731196641922 train_auc_1:0.9242825508117676 val_loss:0.2853357195854187 val_accuracy:0.8872928023338318 val_auc_1:0.8595817685127258 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.88it/s, epoch: 35/40 - train_loss:0.21317821741104126 train_accuracy:0.9164719581604004 train_auc_1:0.9171379208564758 val_loss:0.2900511920452118 val_accuracy:0.8795580267906189 val_auc_1:0.8606053590774536 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.76it/s, epoch: 36/40 - train_loss:0.22284917533397675 train_accuracy:0.909375011920929 train_auc_1:0.903253436088562 val_loss:0.2755865752696991 val_accuracy:0.8839778900146484 val_auc_1:0.8629168272018433 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.70it/s, epoch: 37/40 - train_loss:0.19849534332752228 train_accuracy:0.9175646305084229 train_auc_1:0.923616886138916 val_loss:0.289157897233963 val_accuracy:0.8784530162811279 val_auc_1:0.8603851795196533 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.88it/s, epoch: 38/40 - train_loss:0.20322787761688232 train_accuracy:0.9178650379180908 train_auc_1:0.9194050431251526 val_loss:0.2820649743080139 val_accuracy:0.8850829005241394 val_auc_1:0.8655640482902527 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.71it/s, epoch: 39/40 - train_loss:0.1862594485282898 train_accuracy:0.9291487336158752 train_auc_1:0.9243948459625244 val_loss:0.2956363558769226 val_accuracy:0.8872928023338318 val_auc_1:0.8605613708496094 ]\n", + "100%|██████████| 29/29 [00:01<00:00, 15.88it/s, epoch: 40/40 - train_loss:0.20305216312408447 train_accuracy:0.915409505367279 train_auc_1:0.912611722946167 val_loss:0.2843382656574249 val_accuracy:0.8806629776954651 val_auc_1:0.8598018288612366 ]\n" + ] + } + ], + "source": [ + "histories = []\n", + "for sl_model in [sl_model_origin, sl_model_compress]:\n", + "\n", + " history = sl_model.fit(train_data,\n", + " train_label,\n", + " validation_data=(test_data,test_label),\n", + " epochs=40,\n", + " batch_size=128,\n", + " shuffle=True,\n", + " verbose=1,\n", + " validation_freq=1,\n", + " )\n", + " \n", + " histories.append(history)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "for history in histories:\n", + " plt.plot(history['train_auc_1'])\n", + " plt.plot(history['val_auc_1'])\n", + "\n", + "plt.title('Model Area Under Curve')\n", + "plt.ylabel('Area Under Curve')\n", + "plt.xlabel('Epoch')\n", + "plt.legend(['origin', 'origin_val', 'fp8_compressed', 'fp8_compressed_val'], loc='lower right')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "可以看到,两个模型的验证集auc均在0.85左右波动,使用8位量化对此任务的训练精度影响不大,而理论通讯消耗减少了3/4(从32位减少到了8位)。\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 自定义通讯压缩算法\n", + "\n", + "我们也可以自定义一个压缩算法,SecretFlow提供了SparseCompressor和QuantizedCompressor基类,对应稀疏化方法和量化压缩方法。\n", + "\n", + "这里以量化压缩方法为例,来实现一个基于K-means的压缩算法。\n", + "\n", + "K-means压缩论文是\"Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding\"提出的方法中的其中一个步骤,其思想是把对传输参数进行聚类,保存聚类中心的值,然后把其他值用聚类序号来表示。\n", + "\n", + "继承QuantizedCompressor后,只要实现_compress_one(将一个numpy向量打包为QuantizedData) 和 _decompress_one(将QuantizedData还原回numpy向量)函数即可。" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from secretflow.utils.compressor import QuantizedCompressor, QuantizedData\n", + "import numpy as np\n", + "\n", + "class QuantizedMyKmeans(QuantizedCompressor):\n", + "\n", + " def __init__(self, quant_bits: int = 8):\n", + " super().__init__(quant_bits)\n", + " from sklearn.cluster import KMeans\n", + " self.km = KMeans(2**quant_bits, n_init=1, max_iter=50)\n", + "\n", + " def _compress_one(self, data: np.ndarray) -> QuantizedData:\n", + " ori_shape = data.shape\n", + " self.km.fit(np.expand_dims(data.flatten(), axis=1))\n", + " quantized = self.km.labels_ - (1 << (self.quant_bits - 1))\n", + " quantized = np.reshape(quantized, ori_shape)\n", + " q = self.km.cluster_centers_\n", + "\n", + " return QuantizedData(quantized.astype(self.np_type), q, None, data.dtype)\n", + "\n", + " def _decompress_one(self, data: QuantizedData) -> np.ndarray:\n", + " label = data.data.astype(data.origin_type) + (1 << (self.quant_bits - 1))\n", + " dequantized = np.zeros_like(label)\n", + " for i in range(data.q1.shape[0]):\n", + " dequantized[label == i] = data.q1[i]\n", + "\n", + " return dequantized" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "我们来实例化这个算法,再跑一遍联邦学习模型:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Create proxy actor with party alice.\n", + "INFO:root:Create proxy actor with party bob.\n", + "INFO:root:SL Train Params: {'x': VDataFrame(partitions={PYURuntime(alice): Partition(data=), PYURuntime(bob): Partition(data=)}, aligned=True), 'y': VDataFrame(partitions={PYURuntime(alice): Partition(data=)}, aligned=True), 'batch_size': 128, 'epochs': 40, 'verbose': 1, 'callbacks': None, 'validation_data': (VDataFrame(partitions={PYURuntime(alice): Partition(data=), PYURuntime(bob): Partition(data=)}, aligned=True), VDataFrame(partitions={PYURuntime(alice): Partition(data=)}, aligned=True)), 'shuffle': True, 'sample_weight': None, 'validation_freq': 1, 'dp_spent_step_freq': None, 'dataset_builder': None, 'audit_log_params': {}, 'random_seed': 91222, 'audit_log_dir': None, 'self': }\n", + "\u001b[2m\u001b[36m(pid=30232)\u001b[0m 2023-08-16 01:46:19.445573: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=30250)\u001b[0m 2023-08-16 01:46:19.623566: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=30232)\u001b[0m 2023-08-16 01:46:20.187158: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=30232)\u001b[0m 2023-08-16 01:46:20.187299: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=30232)\u001b[0m 2023-08-16 01:46:20.187315: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "\u001b[2m\u001b[36m(pid=30250)\u001b[0m 2023-08-16 01:46:20.343446: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=30250)\u001b[0m 2023-08-16 01:46:20.343542: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=30250)\u001b[0m 2023-08-16 01:46:20.343556: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m 2023-08-16 01:46:22.002165: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m 2023-08-16 01:46:22.002204: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Model: \"sequential\"\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m _________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Layer (type) Output Shape Param # \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m dense (Dense) (None, 100) 500 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m dense_1 (Dense) (None, 64) 6464 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Total params: 6,964\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Trainable params: 6,964\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Non-trainable params: 0\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m _________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Model: \"model\"\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m __________________________________________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Layer (type) Output Shape Param # Connected to \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m ==================================================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m input_2 (InputLayer) [(None, 64)] 0 [] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m input_3 (InputLayer) [(None, 64)] 0 [] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m concatenate (Concatenate) (None, 128) 0 ['input_2[0][0]', \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m 'input_3[0][0]'] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m dense_2 (Dense) (None, 64) 8256 ['concatenate[0][0]'] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m dense_3 (Dense) (None, 1) 65 ['dense_2[0][0]'] \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m ==================================================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Total params: 8,321\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Trainable params: 8,321\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m Non-trainable params: 0\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30232)\u001b[0m __________________________________________________________________________________________________\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m 2023-08-16 01:46:22.167943: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m 2023-08-16 01:46:22.167979: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m Model: \"sequential\"\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m _________________________________________________________________\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m Layer (type) Output Shape Param # \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m dense (Dense) (None, 100) 1300 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m dense_1 (Dense) (None, 64) 6464 \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m \n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m =================================================================\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m Total params: 7,764\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m Trainable params: 7,764\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m Non-trainable params: 0\n", + "\u001b[2m\u001b[36m(PYUSLTFModel pid=30250)\u001b[0m _________________________________________________________________\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 29/29 [00:13<00:00, 2.39it/s]\u001b[2m\u001b[36m(_run pid=27350)\u001b[0m /tmp/ipykernel_25795/600430472.py:13: ConvergenceWarning: Number of distinct clusters (248) found smaller than n_clusters (256). Possibly due to duplicate points in X.\n", + "100%|██████████| 29/29 [00:15<00:00, 1.83it/s, epoch: 1/40 - train_loss:0.4416384696960449 train_accuracy:0.8701704740524292 train_auc_1:0.518036961555481 val_loss:0.40735140442848206 val_accuracy:0.8729282021522522 val_auc_1:0.5592570304870605 ]\n", + "100%|██████████| 29/29 [00:13<00:00, 2.10it/s, epoch: 2/40 - train_loss:0.36653003096580505 train_accuracy:0.8817349076271057 train_auc_1:0.5584944486618042 val_loss:0.3673045337200165 val_accuracy:0.8729282021522522 val_auc_1:0.6474628448486328 ]\n", + "\u001b[2m\u001b[36m(_run pid=27350)\u001b[0m /tmp/ipykernel_25795/600430472.py:13: ConvergenceWarning: Number of distinct clusters (237) found smaller than n_clusters (256). Possibly due to duplicate points in X.\n", + "100%|██████████| 29/29 [00:11<00:00, 2.35it/s]\u001b[2m\u001b[36m(_run pid=27349)\u001b[0m /tmp/ipykernel_25795/600430472.py:13: ConvergenceWarning: Number of distinct clusters (251) found smaller than n_clusters (256). Possibly due to duplicate points in X.\n", + "100%|██████████| 29/29 [00:13<00:00, 2.18it/s, epoch: 3/40 - train_loss:0.32427504658699036 train_accuracy:0.890625 train_auc_1:0.6890991926193237 val_loss:0.35910260677337646 val_accuracy:0.8729282021522522 val_auc_1:0.6932856440544128 ]\n", + "100%|██████████| 29/29 [00:13<00:00, 2.23it/s, epoch: 4/40 - train_loss:0.31370726227760315 train_accuracy:0.8875584006309509 train_auc_1:0.7440165281295776 val_loss:0.34864872694015503 val_accuracy:0.8729282021522522 val_auc_1:0.7281122803688049 ]\n", + " 0%| | 0/29 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(history_kmeans['train_auc_1'])\n", + "plt.plot(history_kmeans['val_auc_1'])\n", + "\n", + "plt.title('Model Area Under Curve')\n", + "plt.ylabel('Area Under Curve')\n", + "plt.xlabel('Epoch')\n", + "plt.legend(['kmeans','kmeans_val'], loc='lower right')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "最终验证集auc在0.855左右,也还不错~" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 压缩算法的压缩效果\n", + "\n", + "我们在ImageNet预训练的ResNet网络为例,试一下Int8、Fp8和Kmeans方法对模型参数的压缩效果,看看有什么差异。" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n", + " warnings.warn(msg)\n" + ] + } + ], + "source": [ + "from secretflow.utils.compressor import QuantizedZeroPoint, QuantizedFP, QuantizedKmeans\n", + "from torchvision import models\n", + "import ssl\n", + "import time\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "ssl._create_default_https_context = ssl._create_unverified_context\n", + "net = models.resnet50(pretrained=True)\n", + "net_params = [p.detach().numpy().flatten() for p in net.parameters()]\n", + "\n", + "coms =[QuantizedZeroPoint(8), QuantizedFP(8, format='E4M3'), QuantizedFP(8, format='E5M2'), QuantizedKmeans(8, n_clusters=100)]\n", + "losses = []\n", + "durations = []\n", + "\n", + "for c in coms:\n", + " start = time.time()\n", + " c_params = c.compress(net_params)\n", + " dc_params = c.decompress(c_params)\n", + " losses.append(sum([np.sum((a-b)**2) for a,b in zip(net_params, dc_params)]))\n", + " durations.append(time.time()-start)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12.8, 4.8))\n", + "x=[1,2,3,4]\n", + "x_label=['Int8','Fp8-E4M3','Fp8-E5M2','Kmeans']\n", + "\n", + "plt.subplot(121)\n", + "p1 = plt.bar(x, losses, color='deepskyblue')\n", + "plt.bar_label(p1, label_type='edge')\n", + "plt.xticks(x, x_label)\n", + "plt.title('SSE loss in compressing ResNet50')\n", + "plt.ylabel('Sum Square Error')\n", + "\n", + "plt.subplot(122)\n", + "p2 = plt.bar(x, durations, color='salmon')\n", + "plt.bar_label(p2, label_type='edge')\n", + "plt.xticks(x, x_label)\n", + "plt.title('Time comsuming in compressing ResNet50')\n", + "plt.ylabel('time')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "可以看到,kmeans压缩在控制精度损失方面表现最好,但压缩时间非常长。\n", + "\n", + "浮点数(Fp8-M4E3)对ResNet模型参数压缩的效果略优于整型(Int8)压缩,时间消耗是整型压缩的3倍。\n", + "\n", + "实际应用压缩算法时,可根据计算资源和压缩精度进行平衡。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 总结\n", + "\n", + "本篇示例介绍了通讯压缩算法,并在拆分学习的基础之上使用了SecretFlow提供和自行设计的压缩算法。\n", + "\n", + "从实验数据可以看出,将32位数压缩为8位的精度损失不大,而理论通信消耗仅为不作压缩时的1/4,因此在需要频繁传输数据和梯度的拆分学习中,加入通讯压缩不失为一个好的选择。\n", + "\n", + "本教程使用明文聚合来做演示,同时没有考虑隐藏层的泄露问题,SecretFlow提供了聚合层AggLayer,通过MPC,TEE,HE,以及DP等方式规避隐层明文传输泄露的问题。如果您感兴趣,可以看相关文档。" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/secretflow/utils/compressor.py b/secretflow/utils/compressor.py index 16e59bf6b..dec65650a 100644 --- a/secretflow/utils/compressor.py +++ b/secretflow/utils/compressor.py @@ -19,6 +19,7 @@ import jax.numpy as jnp import numpy as np from scipy import sparse + from secretflow.utils.communicate import ForwardData from secretflow.utils.errors import InvalidArgumentError @@ -527,3 +528,117 @@ def _compress_one(self, data: np.ndarray) -> QuantizedData: def _decompress_one(self, data: QuantizedData) -> np.ndarray: return (data.data.astype(data.origin_type) + float(data.q2)) / float(data.q1) + + +class QuantizedFP(QuantizedCompressor): + """Quantized compressor with low-bit floating points, fp16/32/64 will be change directly in numpy format, while fp8 will be stored as int8 object. + + Reference paper "FP8 FORMATS FOR DEEP LEARNING". + + Link: https://arxiv.org/pdf/2209.05433.pdf + """ + + def __init__(self, quant_bits: int = 8, format='E4M3'): + super().__init__(quant_bits) + if quant_bits not in [8, 16, 32, 64]: + raise RuntimeError( + f"The quantized bits for QuantizedFP must in 8/16/32/64, got {quant_bits}" + ) + + if quant_bits == 8 and format not in ['E4M3', 'E5M2']: + raise RuntimeError( + f"The format for fp8 quantized must in E4M3/E5M2, got {format}" + ) + config = { + 'E4M3': {'max_value': 448, 'mant_len': 8, 'exp_offset': 6}, + 'E5M2': {'max_value': 57344, 'mant_len': 4, 'exp_offset': 14}, + } + self.config = config[format] + + def _compress_one(self, data: np.ndarray) -> QuantizedData: + if self.quant_bits > 8: + # fp 16/32/64 + return QuantizedData( + data.astype(getattr(np, f'float{self.quant_bits}')), + None, + None, + data.dtype, + ) + else: + # fp8 with a scale factor, store as np.int8. + q_sign = np.sign(data) + + out = np.abs(data) + max_division = np.max(out) if np.max(out) > 0 else 1 + scale = self.config['max_value'] / max_division + out = out * scale + mant, exp = np.frexp( + out + ) # frexp mantissa range is (-1, 1), not (-2, -1] and [1, 2) + q_exp = np.where( + exp > -self.config['exp_offset'], exp + self.config['exp_offset'], 0 + ) + q_mant = np.round((2 * mant - 1) * self.config['mant_len']) + + quantized = q_sign * (q_exp * self.config['mant_len'] + q_mant) + return QuantizedData( + quantized.astype(self.np_type), scale, None, data.dtype + ) + + def _decompress_one(self, data: QuantizedData) -> np.ndarray: + if self.quant_bits != 8: + return data.data.astype(data.origin_type) + else: + # decompose fp8(stored in int8) to default fp + quantized = data.data + sign = np.sign(quantized) + abs_quantized = np.abs(quantized) + exp = (abs_quantized // self.config['mant_len']) - self.config['exp_offset'] + mant = ( + (abs_quantized % self.config['mant_len']).astype(data.origin_type) + / self.config['mant_len'] + + 1 + ) / 2 + + ori_data = sign * np.ldexp(mant, exp) / data.q1 + return ori_data + + +class QuantizedKmeans(QuantizedCompressor): + """Quantized compressor with Kmeans, a algorithm which replace float with relatived centroid's index. + + Reference paper 2016 "Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding". + + Link: https://arxiv.org/abs/1510.00149 + """ + + def __init__(self, quant_bits: int = 8, n_clusters=None): + super().__init__(quant_bits) + from sklearn.cluster import KMeans + + if n_clusters is None: + self.n_clusters = 2**quant_bits + else: + self.n_clusters = n_clusters + self.km = KMeans(self.n_clusters, n_init=1, max_iter=50) + + def _compress_one(self, data: np.ndarray) -> QuantizedData: + if data.flatten().shape[0] <= self.n_clusters: + return QuantizedData(data, None, None, None) + ori_shape = data.shape + self.km.fit(np.expand_dims(data.flatten(), axis=1)) + quantized = self.km.labels_ - (1 << (self.quant_bits - 1)) + quantized = np.reshape(quantized, ori_shape) + q = self.km.cluster_centers_ + + return QuantizedData(quantized.astype(self.np_type), q, None, data.dtype) + + def _decompress_one(self, data: QuantizedData) -> np.ndarray: + if data.data.flatten().shape[0] <= self.n_clusters: + return data.data + label = data.data.astype(data.origin_type) + (1 << (self.quant_bits - 1)) + dequantized = np.zeros_like(label) + for i in range(data.q1.shape[0]): + dequantized[label == i] = data.q1[i] + + return dequantized diff --git a/tests/utils/test_compressor.py b/tests/utils/test_compressor.py new file mode 100644 index 000000000..2e9efb169 --- /dev/null +++ b/tests/utils/test_compressor.py @@ -0,0 +1,45 @@ +import numpy as np + +from secretflow.utils.compressor import (ForwardData, QuantizedData, + QuantizedFP, QuantizedKmeans) + + +def compressed_test(compressor): + a = np.random.normal(0, 5, size=(128, 256)) + c_a = compressor.compress(a) + assert isinstance(c_a, QuantizedData) + + a_list = [a, a, a] + c_a_list = compressor.compress(a_list) + assert np.all([isinstance(x, QuantizedData) for x in c_a_list]) + + a_forward = ForwardData(hidden=a) + c_a_forward = compressor.compress(a_forward) + assert isinstance(c_a_forward.hidden, QuantizedData) + + +def abs_max_equal_test(compressor): + a = np.random.normal(0, 5, size=(128, 256)) + c_a = compressor.compress(a) + res = compressor.decompress(c_a) + np.testing.assert_almost_equal(np.max(np.abs(res)), np.max(np.abs(a))) + + +def all_zeros_test(compressor): + a = np.zeros((128, 256)) + c_a = compressor.compress(a) + res = compressor.decompress(c_a) + assert (res == a).all() + + +def test_qfp(): + compressor = QuantizedFP() + compressed_test(compressor) + abs_max_equal_test(compressor) + all_zeros_test(compressor) + + +def test_qkm(): + compressor = QuantizedKmeans() + compressed_test(compressor) + all_zeros_test(compressor)