From cabd0a584633dac1d9bf06c16265bd89a5b4b11a Mon Sep 17 00:00:00 2001
From: Satwik Kumar <113476420+sshiri-msft@users.noreply.github.com>
Date: Tue, 13 Aug 2024 11:56:07 -0700
Subject: [PATCH] updated minimal image version (#3346)

---
 ...ndpoints-custom-container-multimodel.ipynb | 904 +++++++++---------
 .../environment/serving/Dockerfile            |   2 +-
 .../online-endpoints-inference-schema.ipynb   |   6 +-
 .../managed/online-endpoints-keyvault.ipynb   |   2 +-
 .../managed/online-endpoints-multimodel.ipynb |   2 +-
 .../managed/online-endpoints-openapi.ipynb    |   2 +-
 6 files changed, 459 insertions(+), 459 deletions(-)

diff --git a/sdk/python/endpoints/online/custom-container/online-endpoints-custom-container-multimodel.ipynb b/sdk/python/endpoints/online/custom-container/online-endpoints-custom-container-multimodel.ipynb
index e4cdbb9867..079acbbc83 100644
--- a/sdk/python/endpoints/online/custom-container/online-endpoints-custom-container-multimodel.ipynb
+++ b/sdk/python/endpoints/online/custom-container/online-endpoints-custom-container-multimodel.ipynb
@@ -1,453 +1,453 @@
 {
-    "cells": [
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "# Create a multimodel deployment using a custom container"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "In this example we serve two models from the same endpoint and same deployment. \n",
-       "\n",
-       "Both model files are registered as a single model asset on Azure and loaded simultaneously in the scoring script. The scoring script parses each request for a \"model\" field and routes the payload accordingly. \n",
-       "\n",
-       "A custom container is not necessary for multimodel deployment - the custom container used here simply adds Python requirements via `pip` to an Azure Inference Minimal base image. An equivalent multimodel deployment can be created using a conda file-based environment."
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## 1. Configure parameters, assets, and clients"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 1.1 Set workspace details"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
-       "resource_group = \"<RESOURCE_GROUP>\"\n",
-       "workspace_name = \"<AML_WORKSPACE_NAME>\""
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 1.2 Set endpoint details"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "import random\n",
-       "\n",
-       "endpoint_name = f\"multimod-{random.randint(0,10000)}\""
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 1.3 Set asset paths\n",
-       "Define the directories containing the two model files as well as a directory which contains the scoring script"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "import os\n",
-       "\n",
-       "base_path = \"../../../../../cli/endpoints/online/custom-container/minimal/multimodel\"\n",
-       "models_path = os.path.join(base_path, \"models\")\n",
-       "code_path = os.path.join(base_path, \"code\")\n",
-       "test_data_path = os.path.join(base_path, \"test-data\")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 1.4 Examine the models folder\n",
-       "The models folder contains two models which will be loaded simultaneously by the scoring script."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "import os\n",
-       "\n",
-       "os.listdir(models_path)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 1.5 Examine the scoring script\n",
-       "\n",
-       "The scoring script loads both models into a dictionary keyed on their name in the `init` function. In the run function, each request is parsed for a `model` key in the JSON to choose the model. The `data` payload is then passed to the appropriate model.\n",
-       "\n",
-       "```python \n",
-       "import joblib\n",
-       "import os\n",
-       "import pandas as pd\n",
-       "from pathlib import Path\n",
-       "import json\n",
-       "\n",
-       "models = None\n",
-       "\n",
-       "\n",
-       "def init():\n",
-       "    global models\n",
-       "    model_dir = Path(os.getenv(\"AZUREML_MODEL_DIR\")) / \"models\"\n",
-       "    models = {m[:-4]: joblib.load(model_dir / m) for m in os.listdir(model_dir)}\n",
-       "\n",
-       "\n",
-       "def run(data):\n",
-       "    data = json.loads(data)\n",
-       "    model = models[data[\"model\"]]\n",
-       "    payload = pd.DataFrame(data[\"data\"])\n",
-       "    try:\n",
-       "        ret = model.predict(payload)\n",
-       "        return pd.DataFrame(ret).to_json()\n",
-       "    except KeyError:\n",
-       "        raise KeyError(\"No such model\")\n",
-       "\n",
-       "``` "
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 1.6 Examine the Dockerfile\n",
-       "The dockerfile is located at `base_path` / `minimal-multimodel.dockerfile`. It uses the AzureML Inference Minimal CPU image as a base and adds relevant dependencies for the scoring script.\n",
-       "\n",
-       "```docker\n",
-       "FROM mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference:latest\n",
-       "\n",
-       "RUN pip install pandas numpy scikit-learn joblib\n",
-       "```"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 1.7 Create an MLClient instance"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "from azure.ai.ml import MLClient\n",
-       "from azure.ai.ml.entities import (\n",
-       "    ManagedOnlineEndpoint,\n",
-       "    ManagedOnlineDeployment,\n",
-       "    Model,\n",
-       "    CodeConfiguration,\n",
-       "    Environment,\n",
-       "    BuildContext,\n",
-       "    ProbeSettings,\n",
-       ")\n",
-       "from azure.identity import DefaultAzureCredential\n",
-       "\n",
-       "credential = DefaultAzureCredential()\n",
-       "ml_client = MLClient(\n",
-       "    credential,\n",
-       "    subscription_id=subscription_id,\n",
-       "    resource_group_name=resource_group,\n",
-       "    workspace_name=workspace_name,\n",
-       ")"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "credential = DefaultAzureCredential()\n",
-       "ml_client = MLClient(\n",
-       "    credential,\n",
-       "    subscription_id=subscription_id,\n",
-       "    resource_group_name=resource_group,\n",
-       "    workspace_name=workspace_name,\n",
-       ")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## 2. Create an endpoint"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 2.1 Define and create the endpoint"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "endpoint = ManagedOnlineEndpoint(name=endpoint_name)\n",
-       "poller = ml_client.online_endpoints.begin_create_or_update(endpoint)\n",
-       "poller.wait()"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 2.2 Confirm that creation was successful"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "from azure.ai.ml.exceptions import DeploymentException\n",
-       "\n",
-       "status = poller.status()\n",
-       "if status != \"Succeeded\":\n",
-       "    raise DeploymentException(status)\n",
-       "else:\n",
-       "    print(\"Endpoint creation succeeded\")\n",
-       "    endpoint = poller.result()\n",
-       "    print(endpoint)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## 3. Create the deployment"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 3.1 Define the deployment"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "deployment = ManagedOnlineDeployment(\n",
-       "    name=\"custom-container-multimodel\",\n",
-       "    endpoint_name=endpoint_name,\n",
-       "    model=Model(name=\"minimal-multimodel\", path=models_path),\n",
-       "    code_configuration=CodeConfiguration(\n",
-       "        code=code_path, scoring_script=\"minimal-multimodel-score.py\"\n",
-       "    ),\n",
-       "    environment=Environment(\n",
-       "        name=\"minimal-multimodel\",\n",
-       "        build=BuildContext(\n",
-       "            path=base_path,\n",
-       "            dockerfile_path=\"minimal-multimodel.dockerfile\",\n",
-       "        ),\n",
-       "        inference_config={\n",
-       "            \"liveness_route\": {\"path\": \"/\", \"port\": 5001},\n",
-       "            \"readiness_route\": {\"path\": \"/\", \"port\": 5001},\n",
-       "            \"scoring_route\": {\"path\": \"/score\", \"port\": 5001},\n",
-       "        },\n",
-       "    ),\n",
-       "    instance_type=\"Standard_DS3_v2\",\n",
-       "    instance_count=1,\n",
-       ")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 3.2 Create the deployment"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "poller = ml_client.online_deployments.begin_create_or_update(deployment)\n",
-       "poller.wait()"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 3.3 Confirm that creation was successful"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "status = poller.status()\n",
-       "if status != \"Succeeded\":\n",
-       "    raise DeploymentException(status)\n",
-       "else:\n",
-       "    print(\"Deployment creation succeeded\")\n",
-       "    deployment = poller.result()\n",
-       "    print(deployment)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 3.4 Set traffic to 100% "
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "endpoint.traffic = {\"custom-container-multimodel\": 100}\n",
-       "poller = ml_client.begin_create_or_update(endpoint)\n",
-       "poller.wait()"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## 4. Test the endpoint\n",
-       "The `model` JSON field in both JSON payloads indicates which model to score."
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 4.1 Test the diabetes model"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "import json\n",
-       "\n",
-       "res = ml_client.online_endpoints.invoke(\n",
-       "    endpoint_name, request_file=os.path.join(test_data_path, \"diabetes-test-data.json\")\n",
-       ")\n",
-       "print(json.loads(res))"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 4.2 Test the iris model"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "res = ml_client.online_endpoints.invoke(\n",
-       "    endpoint_name, request_file=os.path.join(test_data_path, \"iris-test-data.json\")\n",
-       ")\n",
-       "print(json.loads(res))"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## 5. Delete assets"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### 5.1 Delete the endpoint"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "poller = ml_client.online_endpoints.begin_delete(name=endpoint_name)"
-      ]
-     }
-    ],
-    "metadata": {
-     "kernelspec": {
-      "display_name": "Python 3.10 - SDK V2",
-      "language": "python",
-      "name": "python310-sdkv2"
-     },
-     "language_info": {
-      "codemirror_mode": {
-       "name": "ipython",
-       "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.6"
-     },
-     "vscode": {
-      "interpreter": {
-       "hash": "e530ce6154f972640d3e5b626ff5929e0848c7598c5ca98c96181f27d47882a4"
-      }
-     }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 2
-   }
\ No newline at end of file
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create a multimodel deployment using a custom container"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we serve two models from the same endpoint and same deployment. \n",
+    "\n",
+    "Both model files are registered as a single model asset on Azure and loaded simultaneously in the scoring script. The scoring script parses each request for a \"model\" field and routes the payload accordingly. \n",
+    "\n",
+    "A custom container is not necessary for multimodel deployment - the custom container used here simply adds Python requirements via `pip` to an Azure Inference Minimal base image. An equivalent multimodel deployment can be created using a conda file-based environment."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Configure parameters, assets, and clients"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.1 Set workspace details"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "resource_group = \"<RESOURCE_GROUP>\"\n",
+    "workspace_name = \"<AML_WORKSPACE_NAME>\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.2 Set endpoint details"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "\n",
+    "endpoint_name = f\"multimod-{random.randint(0,10000)}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.3 Set asset paths\n",
+    "Define the directories containing the two model files as well as a directory which contains the scoring script"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "base_path = \"../../../../../cli/endpoints/online/custom-container/minimal/multimodel\"\n",
+    "models_path = os.path.join(base_path, \"models\")\n",
+    "code_path = os.path.join(base_path, \"code\")\n",
+    "test_data_path = os.path.join(base_path, \"test-data\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.4 Examine the models folder\n",
+    "The models folder contains two models which will be loaded simultaneously by the scoring script."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.listdir(models_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.5 Examine the scoring script\n",
+    "\n",
+    "The scoring script loads both models into a dictionary keyed on their name in the `init` function. In the run function, each request is parsed for a `model` key in the JSON to choose the model. The `data` payload is then passed to the appropriate model.\n",
+    "\n",
+    "```python \n",
+    "import joblib\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import json\n",
+    "\n",
+    "models = None\n",
+    "\n",
+    "\n",
+    "def init():\n",
+    "    global models\n",
+    "    model_dir = Path(os.getenv(\"AZUREML_MODEL_DIR\")) / \"models\"\n",
+    "    models = {m[:-4]: joblib.load(model_dir / m) for m in os.listdir(model_dir)}\n",
+    "\n",
+    "\n",
+    "def run(data):\n",
+    "    data = json.loads(data)\n",
+    "    model = models[data[\"model\"]]\n",
+    "    payload = pd.DataFrame(data[\"data\"])\n",
+    "    try:\n",
+    "        ret = model.predict(payload)\n",
+    "        return pd.DataFrame(ret).to_json()\n",
+    "    except KeyError:\n",
+    "        raise KeyError(\"No such model\")\n",
+    "\n",
+    "``` "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.6 Examine the Dockerfile\n",
+    "The dockerfile is located at `base_path` / `minimal-multimodel.dockerfile`. It uses the AzureML Inference Minimal CPU image as a base and adds relevant dependencies for the scoring script.\n",
+    "\n",
+    "```docker\n",
+    "FROM mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference:latest\n",
+    "\n",
+    "RUN pip install pandas numpy scikit-learn joblib\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.7 Create an MLClient instance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    "    Model,\n",
+    "    CodeConfiguration,\n",
+    "    Environment,\n",
+    "    BuildContext,\n",
+    "    ProbeSettings,\n",
+    ")\n",
+    "from azure.identity import DefaultAzureCredential\n",
+    "\n",
+    "credential = DefaultAzureCredential()\n",
+    "ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group,\n",
+    "    workspace_name=workspace_name,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "credential = DefaultAzureCredential()\n",
+    "ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id=subscription_id,\n",
+    "    resource_group_name=resource_group,\n",
+    "    workspace_name=workspace_name,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Create an endpoint"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.1 Define and create the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint = ManagedOnlineEndpoint(name=endpoint_name)\n",
+    "poller = ml_client.online_endpoints.begin_create_or_update(endpoint)\n",
+    "poller.wait()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.2 Confirm that creation was successful"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.exceptions import DeploymentException\n",
+    "\n",
+    "status = poller.status()\n",
+    "if status != \"Succeeded\":\n",
+    "    raise DeploymentException(status)\n",
+    "else:\n",
+    "    print(\"Endpoint creation succeeded\")\n",
+    "    endpoint = poller.result()\n",
+    "    print(endpoint)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Create the deployment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.1 Define the deployment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployment = ManagedOnlineDeployment(\n",
+    "    name=\"custom-container-multimodel\",\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=Model(name=\"minimal-multimodel\", path=models_path),\n",
+    "    code_configuration=CodeConfiguration(\n",
+    "        code=code_path, scoring_script=\"minimal-multimodel-score.py\"\n",
+    "    ),\n",
+    "    environment=Environment(\n",
+    "        name=\"minimal-multimodel\",\n",
+    "        build=BuildContext(\n",
+    "            path=base_path,\n",
+    "            dockerfile_path=\"minimal-multimodel.dockerfile\",\n",
+    "        ),\n",
+    "        inference_config={\n",
+    "            \"liveness_route\": {\"path\": \"/\", \"port\": 5001},\n",
+    "            \"readiness_route\": {\"path\": \"/\", \"port\": 5001},\n",
+    "            \"scoring_route\": {\"path\": \"/score\", \"port\": 5001},\n",
+    "        },\n",
+    "    ),\n",
+    "    instance_type=\"Standard_DS3_v2\",\n",
+    "    instance_count=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.2 Create the deployment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "poller = ml_client.online_deployments.begin_create_or_update(deployment)\n",
+    "poller.wait()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.3 Confirm that creation was successful"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "status = poller.status()\n",
+    "if status != \"Succeeded\":\n",
+    "    raise DeploymentException(status)\n",
+    "else:\n",
+    "    print(\"Deployment creation succeeded\")\n",
+    "    deployment = poller.result()\n",
+    "    print(deployment)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.4 Set traffic to 100% "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint.traffic = {\"custom-container-multimodel\": 100}\n",
+    "poller = ml_client.begin_create_or_update(endpoint)\n",
+    "poller.wait()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Test the endpoint\n",
+    "The `model` JSON field in both JSON payloads indicates which model to score."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.1 Test the diabetes model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "res = ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name, request_file=os.path.join(test_data_path, \"diabetes-test-data.json\")\n",
+    ")\n",
+    "print(json.loads(res))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.2 Test the iris model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name, request_file=os.path.join(test_data_path, \"iris-test-data.json\")\n",
+    ")\n",
+    "print(json.loads(res))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Delete assets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.1 Delete the endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "poller = ml_client.online_endpoints.begin_delete(name=endpoint_name)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK V2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "e530ce6154f972640d3e5b626ff5929e0848c7598c5ca98c96181f27d47882a4"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/endpoints/online/llm/semantic-kernel/environment/serving/Dockerfile b/sdk/python/endpoints/online/llm/semantic-kernel/environment/serving/Dockerfile
index fa652eedf4..90722a076f 100644
--- a/sdk/python/endpoints/online/llm/semantic-kernel/environment/serving/Dockerfile
+++ b/sdk/python/endpoints/online/llm/semantic-kernel/environment/serving/Dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference:latest
+FROM mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference:latest
 
 COPY . .
 
diff --git a/sdk/python/endpoints/online/managed/online-endpoints-inference-schema.ipynb b/sdk/python/endpoints/online/managed/online-endpoints-inference-schema.ipynb
index 6aa83895f6..f16023f0ca 100644
--- a/sdk/python/endpoints/online/managed/online-endpoints-inference-schema.ipynb
+++ b/sdk/python/endpoints/online/managed/online-endpoints-inference-schema.ipynb
@@ -238,7 +238,7 @@
     "        code=\"inference-schema/code\", scoring_script=\"score-numpy.py\"\n",
     "    ),\n",
     "    environment=Environment(\n",
-    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference\",\n",
+    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference\",\n",
     "        conda_file=\"inference-schema/env.yml\",\n",
     "    ),\n",
     "    instance_type=\"Standard_DS3_v2\",\n",
@@ -381,7 +381,7 @@
     "        code=\"inference-schema/code/\", scoring_script=\"score-standard.py\"\n",
     "    ),\n",
     "    environment=Environment(\n",
-    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference\",\n",
+    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference\",\n",
     "        conda_file=\"inference-schema/env.yml\",\n",
     "    ),\n",
     "    instance_type=\"Standard_DS3_v2\",\n",
@@ -528,7 +528,7 @@
     "        code=\"inference-schema/code\", scoring_script=\"score-pandas.py\"\n",
     "    ),\n",
     "    environment=Environment(\n",
-    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference\",\n",
+    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference\",\n",
     "        conda_file=\"inference-schema/env.yml\",\n",
     "    ),\n",
     "    instance_type=\"Standard_DS3_v2\",\n",
diff --git a/sdk/python/endpoints/online/managed/online-endpoints-keyvault.ipynb b/sdk/python/endpoints/online/managed/online-endpoints-keyvault.ipynb
index 7c94f29116..4f9d43f0a1 100644
--- a/sdk/python/endpoints/online/managed/online-endpoints-keyvault.ipynb
+++ b/sdk/python/endpoints/online/managed/online-endpoints-keyvault.ipynb
@@ -338,7 +338,7 @@
     "    ),\n",
     "    environment=Environment(\n",
     "        conda_file=\"keyvault/env.yml\",\n",
-    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference:latest\",\n",
+    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference:latest\",\n",
     "    ),\n",
     "    environment_variables={\n",
     "        \"KV_SECRET_MULTIPLIER\": f\"multiplier@https://{keyvault_name}.vault.azure.net\"\n",
diff --git a/sdk/python/endpoints/online/managed/online-endpoints-multimodel.ipynb b/sdk/python/endpoints/online/managed/online-endpoints-multimodel.ipynb
index 3ec12f7fc4..26e4f379fb 100644
--- a/sdk/python/endpoints/online/managed/online-endpoints-multimodel.ipynb
+++ b/sdk/python/endpoints/online/managed/online-endpoints-multimodel.ipynb
@@ -282,7 +282,7 @@
    "source": [
     "environment = Environment(\n",
     "    name=\"minimal-multimodel-conda\",\n",
-    "    image=\"mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference\",\n",
+    "    image=\"mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference\",\n",
     "    conda_file=conda_file_path,\n",
     ")\n",
     "environment = ml_client.environments.create_or_update(environment)"
diff --git a/sdk/python/endpoints/online/managed/online-endpoints-openapi.ipynb b/sdk/python/endpoints/online/managed/online-endpoints-openapi.ipynb
index 0b03bcb455..f75a39d024 100644
--- a/sdk/python/endpoints/online/managed/online-endpoints-openapi.ipynb
+++ b/sdk/python/endpoints/online/managed/online-endpoints-openapi.ipynb
@@ -219,7 +219,7 @@
     "        code=\"openapi/code-decorated\", scoring_script=\"score.py\"\n",
     "    ),\n",
     "    environment=Environment(\n",
-    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu20.04-py38-cpu-inference\",\n",
+    "        image=\"mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cpu-inference\",\n",
     "        conda_file=\"openapi/env.yml\",\n",
     "    ),\n",
     "    instance_type=\"Standard_DS3_v2\",\n",