From 3e59112b8b98626a6519890febd8a66e39dfbf0c Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:40:34 +0000 Subject: [PATCH 1/3] Update DTensor docs, lint notebooks --- .../distribute/dtensor_keras_tutorial.ipynb | 53 ++++++++++--------- .../distribute/dtensor_ml_tutorial.ipynb | 36 ++++++------- 2 files changed, 46 insertions(+), 43 deletions(-) diff --git a/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb index 8785800943a..8f30707fb98 100644 --- a/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb +++ b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb @@ -69,15 +69,16 @@ }, "source": [ "## Overview\n", - "In this tutoral, you will learn how to use DTensor with Keras.\n", + "\n", + "In this tutorial, you will learn how to use DTensors with Keras.\n", "\n", "Through DTensor integration with Keras, you can reuse your existing Keras layers and models to build and train distributed machine learning models.\n", "\n", "You will train a multi-layer classification model with the MNIST data. Setting the layout for subclassing model, Sequential model, and functional model will be demonstrated.\n", "\n", - "This tutoral assumes that you have already read the [DTensor programing guide](/guide/dtensor_overview), and are familiar with basic DTensor concepts like `Mesh` and `Layout`.\n", + "This tutorial assumes that you have already read the [DTensor programing guide](/guide/dtensor_overview), and are familiar with basic DTensor concepts like `Mesh` and `Layout`.\n", "\n", - "This tutoral is based on https://www.tensorflow.org/datasets/keras_example." + "This tutorial is based on [Training a neural network on MNIST with Keras](https://www.tensorflow.org/datasets/keras_example)." ] }, { @@ -88,7 +89,9 @@ "source": [ "## Setup\n", "\n", - "DTensor is part of TensorFlow 2.9.0 release." + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "First, install or upgrade TensorFlow and TensorFlow Datasets:" ] }, { @@ -99,7 +102,7 @@ }, "outputs": [], "source": [ - "!pip install --quiet --upgrade --pre tensorflow tensorflow-datasets" + "!pip install --quiet --upgrade tensorflow tensorflow-datasets" ] }, { @@ -108,9 +111,9 @@ "id": "VttBMZngDx8x" }, "source": [ - "Next, import `tensorflow` and `tensorflow.experimental.dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", + "Next, import `tensorflow` and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", "\n", - "Even though this example uses CPUs, DTensor works the same way on CPU, GPU or TPU devices." + "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." ] }, { @@ -176,11 +179,11 @@ "source": [ "## Creating a Data Parallel Mesh\n", "\n", - "This tutorial demonstrates Data Parallel training. Adapting to Model Parallel training and Spatial Parallel training can be as simple as switching to a different set of `Layout` objects. Refer to [DTensor in-depth ML Tutorial](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) for more information on distributed training beyond Data Parallel.\n", + "This tutorial demonstrates Data Parallel training. Adapting to Model Parallel training and Spatial Parallel training can be as simple as switching to a different set of `Layout` objects. Refer to the [Distributed training with DTensors](dtensor_ml_tutorial.ipynb) tutorial for more information on distributed training beyond Data Parallel.\n", "\n", - "Data Parallel training is a commonly used parallel training scheme, also used by for example `tf.distribute.MirroredStrategy`.\n", + "Data Parallel training is a commonly used parallel training scheme, also used by, for example, `tf.distribute.MirroredStrategy`.\n", "\n", - "With DTensor, a Data Parallel training loop uses a `Mesh` that consists of a single 'batch' dimension, where each device runs a replica of the model that receives a shard from the global batch.\n" + "With DTensor, a Data Parallel training loop uses a `Mesh` that consists of a single 'batch' dimension, where each device runs a replica of the model that receives a shard from the global batch." ] }, { @@ -248,7 +251,7 @@ "\n", "In order to configure the layout information for your layers' weights, Keras has exposed an extra parameter in the layer constructor for most of the built-in layers.\n", "\n", - "The following example builds a small image classification model with fully replicated weight layout. You can specify layout information `kernel` and `bias` in `tf.keras.layers.Dense` via argument `kernel_layout` and `bias_layout`. Most of the built-in keras layers are ready for explicitly specifying the `Layout` for the layer weights." + "The following example builds a small image classification model with fully replicated weight layout. You can specify layout information `kernel` and `bias` in `tf.keras.layers.Dense` via arguments `kernel_layout` and `bias_layout`. Most of the built-in keras layers are ready for explicitly specifying the `Layout` for the layer weights." ] }, { @@ -315,7 +318,7 @@ "source": [ "## Load a dataset and build input pipeline\n", "\n", - "Load a MNIST dataset and configure some pre-processing input pipeline for it. The dataset itself is not associated with any DTensor layout information. There are plans to improve DTensor Keras integration with `tf.data` in future TensorFlow releases.\n" + "Load a MNIST dataset and configure some pre-processing input pipeline for it. The dataset itself is not associated with any DTensor layout information." ] }, { @@ -389,9 +392,9 @@ "source": [ "## Define the training logic for the model\n", "\n", - "Next define the training and evalution logic for the model. \n", + "Next, define the training and evaluation logic for the model. \n", "\n", - "As of TensorFlow 2.9, you have to write a custom-training-loop for a DTensor enabled Keras model. This is to pack the input data with proper layout information, which is not integrated with the standard `tf.keras.Model.fit()` or `tf.keras.Model.eval()` functions from Keras. you will get more `tf.data` support in the upcoming release. " + "As of TensorFlow 2.9, you have to write a custom-training-loop for a DTensor-enabled Keras model. This is to pack the input data with proper layout information, which is not integrated with the standard `tf.keras.Model.fit()` or `tf.keras.Model.eval()` functions from Keras. you will get more `tf.data` support in the upcoming release. " ] }, { @@ -467,7 +470,7 @@ "id": "9Eb-qIJGrxB9" }, "source": [ - "## Metrics and Optimizers\n", + "## Metrics and optimizers\n", "\n", "When using DTensor API with Keras `Metric` and `Optimizer`, you will need to provide the extra mesh information, so that any internal state variables and tensors can work with variables in the model.\n", "\n", @@ -497,9 +500,9 @@ "source": [ "## Train the model\n", "\n", - "The following example shards the data from input pipeline on the batch dimension, and train with the model, which has fully replicated weights. \n", + "The following example demonstrates how to shard the data from input pipeline on the batch dimension, and train with the model, which has fully replicated weights. \n", "\n", - "With 3 epochs, the model should achieve about 97% of accuracy." + "After 3 epochs, the model should achieve about 97% of accuracy:" ] }, { @@ -561,13 +564,13 @@ "\n", "Often you have models that work well for your use case. Specifying `Layout` information to each individual layer within the model will be a large amount of work requiring a lot of edits.\n", "\n", - "To help you easily convert your existing Keras model to work with DTensor API you can use the new `dtensor.LayoutMap` API that allow you to specify the `Layout` from a global point of view.\n", + "To help you easily convert your existing Keras model to work with DTensor API you can use the new `tf.keras.dtensor.experimental.LayoutMap` API that allow you to specify the `Layout` from a global point of view.\n", "\n", "First, you need to create a `LayoutMap` instance, which is a dictionary-like object that contains all the `Layout` you would like to specify for your model weights.\n", "\n", "`LayoutMap` needs a `Mesh` instance at init, which can be used to provide default replicated `Layout` for any weights that doesn't have Layout configured. In case you would like all your model weights to be just fully replicated, you can provide empty `LayoutMap`, and the default mesh will be used to create replicated `Layout`.\n", "\n", - "`LayoutMap` uses a string as key and a `Layout` as value. There is a behavior difference between a normal Python dict and this class. The string key will be treated as a regex when retrieving the value" + "`LayoutMap` uses a string as key and a `Layout` as value. There is a behavior difference between a normal Python dict and this class. The string key will be treated as a regex when retrieving the value." ] }, { @@ -616,9 +619,9 @@ "* `model.feature_2.kernel`\n", "* `model.feature_2.bias`\n", "\n", - "Note: For Subclassed Models, the attribute name, rather than the `.name` attribute of layer are used as the key to retrieve the Layout from the mapping. This is consistent with the convention followed by `tf.Module` checkpointing. For complex models with more than a few layers, you can [manually inspect checkpoints](https://www.tensorflow.org/guide/checkpoint#manually_inspecting_checkpoints) to see the attribute mappings. \n", + "Note: For subclassed Models, the attribute name, rather than the `.name` attribute of the layer, is used as the key to retrieve the Layout from the mapping. This is consistent with the convention followed by `tf.Module` checkpointing. For complex models with more than a few layers, you can [manually inspect checkpoints](https://www.tensorflow.org/guide/checkpoint#manually_inspecting_checkpoints) to view the attribute mappings. \n", "\n", - "Now define the following `LayoutMap` and apply it to the model." + "Now define the following `LayoutMap` and apply it to the model:" ] }, { @@ -644,7 +647,7 @@ "id": "M32HcSp_PyWs" }, "source": [ - "The model weights are created on the first call, so call the model with a DTensor input and confirm the weights have the expected layouts." + "The model weights are created on the first call, so call the model with a DTensor input and confirm the weights have the expected layouts:" ] }, { @@ -686,9 +689,9 @@ "id": "6zzvTqAR2Teu" }, "source": [ - "For keras functional and sequential models, you can use `LayoutMap` as well.\n", + "For Keras Functional and Sequential models, you can use `tf.keras.dtensor.experimental.LayoutMap` as well.\n", "\n", - "Note: For functional and sequential models, the mappings are slightly different. The layers in the model don't have a public attribute attached to the model (though you can access them via `model.layers` as a list). Use the string name as the key in this case. The string name is guaranteed to be unique within a model." + "Note: For Functional and Sequential models, the mappings are slightly different. The layers in the model don't have a public attribute attached to the model (though you can access them via `Model.layers` as a list). Use the string name as the key in this case. The string name is guaranteed to be unique within a model." ] }, { @@ -745,7 +748,7 @@ "metadata": { "colab": { "name": "dtensor_keras_tutorial.ipynb", - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb index 9661c5fefbc..2e76f0a2fe1 100644 --- a/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb +++ b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb @@ -37,7 +37,7 @@ "id": "MfBg1C5NB3X0" }, "source": [ - "# Distributed Training with DTensors\n" + "# Distributed training with DTensors" ] }, { @@ -70,25 +70,22 @@ "source": [ "## Overview\n", "\n", - "DTensor provides a way for you to distribute the training of your model across devices to improve efficiency, reliability and scalability. For more details on DTensor concepts, see [The DTensor Programming Guide](https://www.tensorflow.org/guide/dtensor_overview).\n", + "DTensor provides a way for you to distribute the training of your model across devices to improve efficiency, reliability and scalability. For more details, check out the [DTensor concepts](../guide/dtensor_overview.ipynb) guide.\n", "\n", - "In this tutorial, you will train a Sentiment Analysis model with DTensor. Three distributed training schemes are demonstrated with this example:\n", + "In this tutorial, you will train a sentiment analysis model using DTensors. The example demonstrates three distributed training schemes:\n", "\n", " - Data Parallel training, where the training samples are sharded (partitioned) to devices.\n", " - Model Parallel training, where the model variables are sharded to devices.\n", - " - Spatial Parallel training, where the features of input data are sharded to devices. (Also known as [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus))\n", + " - Spatial Parallel training, where the features of input data are sharded to devices (also known as [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus)).\n", "\n", - "The training portion of this tutorial is inspired [A Kaggle guide on Sentiment Analysis](https://www.kaggle.com/code/anasofiauzsoy/yelp-review-sentiment-analysis-tensorflow-tfds/notebook) notebook. To learn about the complete training and evaluation workflow (without DTensor), refer to that notebook.\n", + "The training portion of this tutorial is inspired by a Kaggle notebook called [A Kaggle guide on sentiment analysis](https://www.kaggle.com/code/anasofiauzsoy/yelp-review-sentiment-analysis-tensorflow-tfds/notebook). To learn about the complete training and evaluation workflow (without DTensor), refer to that notebook.\n", "\n", "This tutorial will walk through the following steps:\n", "\n", - "- First start with some data cleaning to obtain a `tf.data.Dataset` of tokenized sentences and their polarity.\n", - "\n", - "- Next build an MLP model with custom Dense and BatchNorm layers. Use a `tf.Module` to track the inference variables. The model constructor takes additional `Layout` arguments to control the sharding of variables.\n", - "\n", - "- For training, you will first use data parallel training together with `tf.experimental.dtensor`'s checkpoint feature. Then continue with Model Parallel Training and Spatial Parallel Training.\n", - "\n", - "- The final section briefly describes the interaction between `tf.saved_model` and `tf.experimental.dtensor` as of TensorFlow 2.9.\n" + "- Some data cleaning to obtain a `tf.data.Dataset` of tokenized sentences and their polarity.\n", + "- Then, building an MLP model with custom Dense and BatchNorm layers using a `tf.Module` to track the inference variables. The model constructor will take additional `Layout` arguments to control the sharding of variables.\n", + "- For training, you will first use data parallel training together with `tf.experimental.dtensor`'s checkpoint feature. Then, you will continue with Model Parallel Training and Spatial Parallel Training.\n", + "- The final section briefly describes the interaction between `tf.saved_model` and `tf.experimental.dtensor` as of TensorFlow 2.9." ] }, { @@ -99,7 +96,9 @@ "source": [ "## Setup\n", "\n", - "DTensor is part of TensorFlow 2.9.0 release." + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "First, install or upgrade TensorFlow and TensorFlow Datasets:" ] }, { @@ -110,7 +109,7 @@ }, "outputs": [], "source": [ - "!pip install --quiet --upgrade --pre tensorflow tensorflow-datasets" + "!pip install --quiet --upgrade tensorflow tensorflow-datasets" ] }, { @@ -119,9 +118,9 @@ "id": "tcxP4_Zu7ciQ" }, "source": [ - "Next, import `tensorflow` and `tensorflow.experimental.dtensor`. Then configure TensorFlow to use 8 virtual CPUs.\n", + "Next, import `tensorflow` and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", "\n", - "Even though this example uses CPUs, DTensor works the same way on CPU, GPU or TPU devices." + "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." ] }, { @@ -139,6 +138,7 @@ "import tensorflow as tf\n", "\n", "from tensorflow.experimental import dtensor\n", + "\n", "print('TensorFlow version:', tf.__version__)" ] }, @@ -170,7 +170,7 @@ "source": [ "## Download the dataset\n", "\n", - "Download the IMDB reviews data set to train the sentiment analysis model." + "Download the IMDB reviews data set to train the sentiment analysis model:" ] }, { @@ -1058,7 +1058,7 @@ "colab": { "collapsed_sections": [], "name": "dtensor_ml_tutorial.ipynb", - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", From a3d095ee65d1626e5b9ff92463fb865ca791740b Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:50:31 +0000 Subject: [PATCH 2/3] Update DTensor docs, lint notebooks --- site/en/guide/dtensor_overview.ipynb | 61 +++++++++++++++------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/site/en/guide/dtensor_overview.ipynb b/site/en/guide/dtensor_overview.ipynb index 252b84e7741..0142749501e 100644 --- a/site/en/guide/dtensor_overview.ipynb +++ b/site/en/guide/dtensor_overview.ipynb @@ -37,7 +37,7 @@ "id": "VcQIa1uG86Wh" }, "source": [ - "# DTensor Concepts" + "# DTensor concepts" ] }, { @@ -76,7 +76,7 @@ "\n", "By decoupling the application from sharding directives, DTensor enables running the same application on a single device, multiple devices, or even multiple clients, while preserving its global semantics.\n", "\n", - "This guide introduces DTensor concepts for distributed computing, and how DTensor integrates with TensorFlow. To see a demo of using DTensor in model training, see [Distributed training with DTensor](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) tutorial." + "This guide introduces DTensor concepts for distributed computing, and how DTensor integrates with TensorFlow. For a demo of using DTensor in model training, refer to the [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb) tutorial." ] }, { @@ -87,7 +87,9 @@ "source": [ "## Setup\n", "\n", - "DTensor is part of TensorFlow 2.9.0 release, and also included in the TensorFlow nightly builds since 04/09/2022." + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "First, install or upgrade TensorFlow:" ] }, { @@ -98,7 +100,7 @@ }, "outputs": [], "source": [ - "!pip install --quiet --upgrade --pre tensorflow" + "!pip install --quiet --upgrade tensorflow" ] }, { @@ -107,9 +109,9 @@ "id": "O3pG29uZIWYO" }, "source": [ - "Once installed, import `tensorflow` and `tf.experimental.dtensor`. Then configure TensorFlow to use 6 virtual CPUs.\n", + "Then, import `tensorflow` and `dtensor`, and configure TensorFlow to use 6 virtual CPUs.\n", "\n", - "Even though this example uses vCPUs, DTensor works the same way on CPU, GPU or TPU devices." + "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." ] }, { @@ -343,7 +345,7 @@ "id": "TTalu6M-ISYb" }, "source": [ - "### Single-Client and Multi-Client Applications\n", + "### Single-client and multi-client applications\n", "\n", "DTensor supports both single-client and multi-client applications. The colab Python kernel is an example of a single client DTensor application, where there is a single Python process.\n", "\n", @@ -365,7 +367,8 @@ "source": [ "## DTensor as a sharded tensor\n", "\n", - "Now let's start coding with `DTensor`. The helper function, `dtensor_from_array`, demonstrates creating DTensors from something that looks like a `tf.Tensor`. The function performs 2 steps:\n", + "Now, start coding with `DTensor`. The helper function, `dtensor_from_array`, demonstrates creating DTensors from something that looks like a `tf.Tensor`. The function performs two steps:\n", + "\n", " - Replicates the tensor to every device on the mesh.\n", " - Shards the copy according to the layout requested in its arguments." ] @@ -410,7 +413,7 @@ " - A `Layout`, which defines the `Mesh` the `Tensor` belongs to, and how the `Tensor` is sharded onto the `Mesh`.\n", " - A list of **component tensors**, one item per local device in the `Mesh`.\n", "\n", - "With `dtensor_from_array`, you can create your first DTensor, `my_first_dtensor`, and examine its contents." + "With `dtensor_from_array`, you can create your first DTensor, `my_first_dtensor`, and examine its contents:" ] }, { @@ -426,7 +429,7 @@ "\n", "my_first_dtensor = dtensor_from_array([0, 1], layout)\n", "\n", - "# Examine the dtensor content\n", + "# Examine the DTensor content\n", "print(my_first_dtensor)\n", "print(\"global shape:\", my_first_dtensor.shape)\n", "print(\"dtype:\", my_first_dtensor.dtype)" @@ -440,7 +443,7 @@ "source": [ "#### Layout and `fetch_layout`\n", "\n", - "The layout of a DTensor is not a regular attribute of `tf.Tensor`. Instead, DTensor provides a function, `dtensor.fetch_layout` to access the layout of a DTensor." + "The layout of a DTensor is not a regular attribute of `tf.Tensor`. Instead, DTensor provides a function, `dtensor.fetch_layout` to access the layout of a DTensor:" ] }, { @@ -499,7 +502,7 @@ "source": [ "The inverse operation of `dtensor.unpack` is `dtensor.pack`. Component tensors can be packed back into a DTensor.\n", "\n", - "The components must have the same rank and dtype, which will be the rank and dtype of the returned DTensor. However there is no strict requirement on the device placement of component tensors as inputs of `dtensor.unpack`: the function will automatically copy the component tensors to their respective corresponding devices.\n" + "The components must have the same rank and dtype, which will be the rank and dtype of the returned DTensor. However, there is no strict requirement on the device placement of component tensors as inputs of `dtensor.unpack`: the function will automatically copy the component tensors to their respective corresponding devices.\n" ] }, { @@ -528,7 +531,7 @@ "\n", "So far you've worked with the `my_first_dtensor`, which is a rank-1 DTensor fully replicated across a dim-1 `Mesh`.\n", "\n", - "Next create and inspect DTensors that are sharded across a dim-2 `Mesh`. The next example does this with a 3x2 `Mesh` on 6 CPU devices, where size of mesh dimension `'x'` is 3 devices, and size of mesh dimension`'y'` is 2 devices." + "Next, create and inspect DTensors that are sharded across a dim-2 `Mesh`. The following example does this with a 3x2 `Mesh` on 6 CPU devices, where size of mesh dimension `'x'` is 3 devices, and size of mesh dimension`'y'` is 2 devices:" ] }, { @@ -620,7 +623,7 @@ " - 1st axis sharded along the `'x'` mesh dimension.\n", " - 2nd axis replicated along the `'y'` mesh dimension.\n", "\n", - "To achieve this sharding scheme, you just need to replace the sharding spec of the 2nd axis from `'y'` to `dtensor.UNSHARDED`, to indicate your intention of replicating along the 2nd axis. The layout object will look like `Layout(['x', dtensor.UNSHARDED], mesh)`." + "To achieve this sharding scheme, you just need to replace the sharding spec of the 2nd axis from `'y'` to `dtensor.UNSHARDED`, to indicate your intention of replicating along the 2nd axis. The layout object will look like `Layout(['x', dtensor.UNSHARDED], mesh)`:" ] }, { @@ -659,7 +662,7 @@ "source": [ "#### Tensor.numpy() and sharded DTensor\n", "\n", - "Be aware that calling the `.numpy()` method on a sharded DTensor raises an error. The rationale for erroring is to protect against unintended gathering of data from multiple computing devices to the host CPU device backing the returned numpy array." + "Be aware that calling the `.numpy()` method on a sharded DTensor raises an error. The rationale for erroring is to protect against unintended gathering of data from multiple computing devices to the host CPU device backing the returned NumPy array:" ] }, { @@ -704,8 +707,9 @@ "Note: DTensor is still an experimental API which means you will be exploring and pushing the boundaries and limits of the DTensor programming model.\n", "\n", "There are 2 ways of triggering DTensor execution:\n", - " - DTensor as operands of a Python function, e.g. `tf.matmul(a, b)` will run through DTensor if `a`, `b`, or both are DTensors.\n", - " - Requesting the result of a Python function to be a DTensor, e.g. `dtensor.call_with_layout(tf.ones, layout, shape=(3, 2))` will run through DTensor because we requested the output of tf.ones to be sharded according to a `layout`." + "\n", + " - DTensor as operands of a Python function, such as `tf.matmul(a, b)`, will run through DTensor if `a`, `b`, or both are DTensors.\n", + " - Requesting the result of a Python function to be a DTensor, such as `dtensor.call_with_layout(tf.ones, layout, shape=(3, 2))`, will run through DTensor because we requested the output of `tf.ones` to be sharded according to a `layout`." ] }, { @@ -714,7 +718,7 @@ "id": "urKzmqAoPssT" }, "source": [ - "### DTensor as Operands\n", + "### DTensor as operands\n", "\n", "Many TensorFlow API functions take `tf.Tensor` as their operands, and returns `tf.Tensor` as their results. For these functions, you can express intention to run a function through DTensor by passing in DTensor as operands. This section uses `tf.matmul(a, b)` as an example." ] @@ -755,7 +759,7 @@ "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)\n", "print(\"components:\")\n", "for component_tensor in dtensor.unpack(c):\n", - " print(component_tensor.device, component_tensor.numpy())\n" + " print(component_tensor.device, component_tensor.numpy())" ] }, { @@ -800,11 +804,10 @@ "id": "IhD8yYgJiCEh" }, "source": [ - "#### Additional Sharding\n", + "#### Additional sharding\n", "\n", "You can perform additional sharding on the inputs, and they are appropriately carried over to the results. For example, you can apply additional sharding of operand `a` along its first axis to the `'y'` mesh dimension. The additional sharding will be carried over to the first axis of the result `c`.\n", "\n", - "\n", "Total number of floating point mul operations is `6 devices * 2 result * 1 = 12`, an additional factor of 2 reduction compared to the case (24) above. The factor of 2 is due to the sharding along `y` mesh dimension with a size of `2` devices." ] }, @@ -837,11 +840,11 @@ "id": "c-1NazCVmLWZ" }, "source": [ - "### DTensor as Output\n", + "### DTensor as output\n", "\n", - "What about Python functions that do not take operands, but returns a Tensor result that can be sharded? Examples of such functions are\n", + "What about Python functions that do not take operands, but returns a Tensor result that can be sharded? Examples of such functions are:\n", "\n", - " - `tf.ones`, `tf.zeros`, `tf.random.stateless_normal`,\n", + " - `tf.ones`, `tf.zeros`, `tf.random.stateless_normal`\n", "\n", "For these Python functions, DTensor provides `dtensor.call_with_layout` which eagerly executes a Python function with DTensor, and ensures that the returned Tensor is a DTensor with the requested `Layout`." ] @@ -876,7 +879,7 @@ "source": [ "#### APIs that emit a single TensorFlow Op\n", "\n", - "If a function emits a single TensorFlow Op, you can directly apply `dtensor.call_with_layout` to the function." + "If a function emits a single TensorFlow Op, you can directly apply `dtensor.call_with_layout` to the function:" ] }, { @@ -911,7 +914,7 @@ "source": [ "#### APIs that emit multiple TensorFlow Ops\n", "\n", - "If the API emits multiple TensorFlow Ops, convert the function into a single Op through `tf.function`. For example `tf.random.stateleess_normal`" + "If the API emits multiple TensorFlow Ops, convert the function into a single Op through `tf.function`. For example, `tf.random.stateleess_normal`:" ] }, { @@ -1030,7 +1033,7 @@ "id": "QxBdNHWSu-kV" }, "source": [ - "You can also assign a DTensor to a DVariable.\n" + "You can also assign a DTensor to a DVariable:\n" ] }, { @@ -1051,7 +1054,7 @@ "id": "4fvSk_VUvGnj" }, "source": [ - "Attempting to mutate the layout of a `DVariable`, by assigning a DTensor with an incompatible layout produces an error." + "Attempting to mutate the layout of a `DVariable`, by assigning a DTensor with an incompatible layout produces an error:" ] }, { @@ -1081,7 +1084,7 @@ "source": [ "## What's next?\n", "\n", - "In this colab, you learned about DTensor, an extension to TensorFlow for distributed computing. To try out these concepts in a tutorial, see [Distributed training with DTensor](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial)." + "In this colab, you learned about DTensor, an extension to TensorFlow for distributed computing. To try out these concepts in a tutorial, check out [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb)." ] } ], From 9e063019f274168d5b0ccef09c4509172b81dc88 Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Fri, 29 Sep 2023 21:43:19 +0000 Subject: [PATCH 3/3] Update DTensor docs, lint notebooks --- site/en/guide/dtensor_overview.ipynb | 24 +------------------ .../distribute/dtensor_keras_tutorial.ipynb | 6 ++--- .../distribute/dtensor_ml_tutorial.ipynb | 4 ++-- 3 files changed, 6 insertions(+), 28 deletions(-) diff --git a/site/en/guide/dtensor_overview.ipynb b/site/en/guide/dtensor_overview.ipynb index 0142749501e..95a50f3465f 100644 --- a/site/en/guide/dtensor_overview.ipynb +++ b/site/en/guide/dtensor_overview.ipynb @@ -89,29 +89,7 @@ "\n", "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", "\n", - "First, install or upgrade TensorFlow:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OKaPw8vwwZAC" - }, - "outputs": [], - "source": [ - "!pip install --quiet --upgrade tensorflow" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "O3pG29uZIWYO" - }, - "source": [ - "Then, import `tensorflow` and `dtensor`, and configure TensorFlow to use 6 virtual CPUs.\n", - "\n", - "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." + "Begin by importing TensorFlow, `dtensor`, and configure TensorFlow to use 6 virtual CPUs. Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." ] }, { diff --git a/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb index 8f30707fb98..84f6478c2b5 100644 --- a/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb +++ b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb @@ -91,7 +91,7 @@ "\n", "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", "\n", - "First, install or upgrade TensorFlow and TensorFlow Datasets:" + "First, install or upgrade TensorFlow Datasets:" ] }, { @@ -102,7 +102,7 @@ }, "outputs": [], "source": [ - "!pip install --quiet --upgrade tensorflow tensorflow-datasets" + "!pip install --quiet --upgrade tensorflow-datasets" ] }, { @@ -111,7 +111,7 @@ "id": "VttBMZngDx8x" }, "source": [ - "Next, import `tensorflow` and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", + "Next, import TensorFlow and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", "\n", "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." ] diff --git a/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb index 2e76f0a2fe1..3c02800b6b4 100644 --- a/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb +++ b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb @@ -98,7 +98,7 @@ "\n", "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", "\n", - "First, install or upgrade TensorFlow and TensorFlow Datasets:" + "First, install or upgrade TensorFlow Datasets:" ] }, { @@ -109,7 +109,7 @@ }, "outputs": [], "source": [ - "!pip install --quiet --upgrade tensorflow tensorflow-datasets" + "!pip install --quiet --upgrade tensorflow-datasets" ] }, {