From ccc9f8dba6c1874f3ce76ea60404e6764d21f720 Mon Sep 17 00:00:00 2001
From: azul <fede.garza.ramirez@gmail.com>
Date: Mon, 20 May 2024 14:21:14 -0700
Subject: [PATCH 1/6] feat: add tigemen docs (#3196)

* feat: add timegen nbs

* feat: add codeowner

* feat: add prerequisites to all tutorials

* feat: ci errors

* fix: rm outputs

---------

Co-authored-by: Kriti <53083330+fkriti@users.noreply.github.com>
---
 .github/CODEOWNERS                            |   1 +
 .../nixtla/01_quickstart_forecast.ipynb       | 112 ++++
 .../nixtla/02_finetuning.ipynb                | 181 ++++++
 .../nixtla/03_anomaly_detection.ipynb         | 253 ++++++++
 .../nixtla/04_exogenous_variables.ipynb       | 444 +++++++++++++
 .../nixtla/05_demand_forecasting.ipynb        | 605 ++++++++++++++++++
 6 files changed, 1596 insertions(+)
 create mode 100644 sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb
 create mode 100644 sdk/python/foundation-models/nixtla/02_finetuning.ipynb
 create mode 100644 sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb
 create mode 100644 sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb
 create mode 100644 sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index e5c192716c..3cac4cb9d9 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -17,6 +17,7 @@
 /sdk/python/foundation-models/cohere/cohere-aisearch-langchain-rag.ipynb @stewart-co @kseniia-cohere  
 sdk/python/foundation-models/cohere/command_faiss_langchain.ipynb @stewart-co @kseniia-cohere  
 sdk/python/foundation-models/cohere/command_tools-langchain.ipynb @stewart-co @kseniia-cohere
+/sdk/python/foundation-models/nixtla/ @AzulGarza
 
 #### files referenced in docs (DO NOT EDIT, except for Docs team!!!) #############################################################################################
 /cli/assets/component/train.yml @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1  
diff --git a/sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb b/sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb
new file mode 100644
index 0000000000..711531f5c2
--- /dev/null
+++ b/sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb
@@ -0,0 +1,112 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prerequisites\n",
+    "\n",
+    "Please make sure to follow these steps to start using TimeGEN: \n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `TimeGEN-1`.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "\n",
+    "* Deploy with \"Pay-as-you-go\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Quickstart\n",
+    "\n",
+    "To forecast with TimeGEN, simply call the `forecast` method, making sure that you pass your DataFrame, and specify your target and time column names. Then you can plot the predictions using the `plot` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from nixtla import NixtlaClient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate the Nixtla Client\n",
+    "nixtla_client = NixtlaClient(\n",
+    "    base_url=\"you azure ai endpoint\",\n",
+    "    api_key=\"your api_key\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read the data\n",
+    "df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Forecast\n",
+    "forecast_df = nixtla_client.forecast(\n",
+    "    df=df,\n",
+    "    h=12,\n",
+    "    time_col=\"timestamp\",\n",
+    "    target_col=\"value\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot predictions\n",
+    "nixtla_client.plot(\n",
+    "    df=df, forecasts_df=forecast_df, time_col=\"timestamp\", target_col=\"value\"\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/sdk/python/foundation-models/nixtla/02_finetuning.ipynb b/sdk/python/foundation-models/nixtla/02_finetuning.ipynb
new file mode 100644
index 0000000000..264a54591e
--- /dev/null
+++ b/sdk/python/foundation-models/nixtla/02_finetuning.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a3e70828-d972-4231-aa21-89e5ede59366",
+   "metadata": {},
+   "source": [
+    "# Prerequisites\n",
+    "\n",
+    "Please make sure to follow these steps to start using TimeGEN: \n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `TimeGEN-1`.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "\n",
+    "* Deploy with \"Pay-as-you-go\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da753996-54f8-4244-a34e-7316b0c01827",
+   "metadata": {},
+   "source": [
+    "# Fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "75a62889-d81e-462e-b235-c1eba1096da9",
+   "metadata": {},
+   "source": [
+    "Fine-tuning is a powerful process for utilizing TimeGEN more effectively. Foundation models such as TimeGEN are pre-trained on vast amounts of data, capturing wide-ranging features and patterns. These models can then be specialized for specific contexts or domains. With fine-tuning, the model's parameters are refined to forecast a new task, allowing it to tailor its vast pre-existing knowledge towards the requirements of the new data. Fine-tuning thus serves as a crucial bridge, linking TimeGEN's broad capabilities to your tasks specificities.\n",
+    "\n",
+    "Concretely, the process of fine-tuning consists of performing a certain number of training iterations on your input data minimizing the forecasting error. The forecasts will then be produced with the updated model. To control the number of iterations, use the `finetune_steps` argument of the `forecast` method.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10ec4f03",
+   "metadata": {},
+   "source": [
+    "## 1. Import packages\n",
+    "First, we import the required packages and initialize the Nixtla client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98942108-d427-42d6-81f8-fa0bb5859395",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from nixtla import NixtlaClient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64178d1c-957e-4a04-ab64-fde332b1840c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client = NixtlaClient(\n",
+    "    base_url=\"you azure ai endpoint\",\n",
+    "    api_key=\"your api_key\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c2e5387",
+   "metadata": {},
+   "source": [
+    "## 2. Load data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b78cc83e-7d34-4c37-906d-8c7ed1a977fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv\"\n",
+    ")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09be4766",
+   "metadata": {},
+   "source": [
+    "## 3. Fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a683abc7-190c-40a6-a4e8-41a4c64bd773",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "timegpt_fcst_finetune_df = nixtla_client.forecast(\n",
+    "    df=df,\n",
+    "    h=12,\n",
+    "    finetune_steps=10,\n",
+    "    time_col=\"timestamp\",\n",
+    "    target_col=\"value\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "545ffdac-f166-417b-993f-78f51b0db6a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(\n",
+    "    df,\n",
+    "    timegpt_fcst_finetune_df,\n",
+    "    time_col=\"timestamp\",\n",
+    "    target_col=\"value\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62fc9cba-7c6e-4aef-9c68-e05d4fe8f7ba",
+   "metadata": {},
+   "source": [
+    "In this code, `finetune_steps=10` means the model will go through 10 iterations of training on your time series data.\n",
+    "\n",
+    "Keep in mind that fine-tuning can be a bit of trial and error. You might need to adjust the number of `finetune_steps` based on your specific needs and the complexity of your data. It's recommended to monitor the model's performance during fine-tuning and adjust as needed. Be aware that more `finetune_steps` may lead to longer training times and could potentially lead to overfitting if not managed properly. \n",
+    "\n",
+    "Remember, fine-tuning is a powerful feature, but it should be used thoughtfully and carefully."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c546351",
+   "metadata": {},
+   "source": [
+    "For a detailed guide on using a specific loss function for fine-tuning, check out the [Fine-tuning with a specific loss function](https://docs.nixtla.io/docs/tutorials-fine_tuning_with_a_specific_loss_function) tutorial."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb b/sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb
new file mode 100644
index 0000000000..24c39c0062
--- /dev/null
+++ b/sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb
@@ -0,0 +1,253 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prerequisites\n",
+    "\n",
+    "Please make sure to follow these steps to start using TimeGEN: \n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `TimeGEN-1`.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "\n",
+    "* Deploy with \"Pay-as-you-go\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Anomaly detection\n",
+    "\n",
+    "Anomaly detection is task of detecting abnormal points, points that deviate from the normal behaviour of the general series. This is crucial in many application, such as cybersecurity or equipment monitoring.\n",
+    "\n",
+    "In this tutorial, we explore in detail the anomaly detection capability of TimeGEN.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import packages\n",
+    "\n",
+    "First, we import the required packages for this tutorial and create an instance of `NixtlaClient`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from nixtla import NixtlaClient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client = NixtlaClient(\n",
+    "    base_url=\"you azure ai endpoint\",\n",
+    "    api_key=\"your api_key\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load dataset\n",
+    "\n",
+    "Now, let's load the dataset for this tutorial. We use the Peyton Manning dataset which tracks the visits to the Wikipedia page of Peyton Mannig."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/peyton_manning.csv\"\n",
+    ")\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(\n",
+    "    df, time_col=\"timestamp\", target_col=\"value\", max_insample_length=365\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Anomaly detection\n",
+    "\n",
+    "We now perform anomaly detection. By default, TimeGEN uses a 99% confidence interval. If a point falls outisde of that interval, it is considered to be an anomaly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "anomalies_df = nixtla_client.detect_anomalies(\n",
+    "    df,\n",
+    "    time_col=\"timestamp\",\n",
+    "    target_col=\"value\",\n",
+    "    freq=\"D\",\n",
+    ")\n",
+    "\n",
+    "anomalies_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see, 0 is assigned to \"normal\" values, as they fall inside the confidence interval. A label of 1 is then assigned to abnormal points.\n",
+    "\n",
+    "We can also plot the anomalies using `NixtlaClient`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(df, anomalies_df, time_col=\"timestamp\", target_col=\"value\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Anomaly detection with exogenous features\n",
+    "\n",
+    "Previously, we performed anomaly detection without using any exogenous features. Now, it is possible to create features specifically for this scnenario to inform the model in its task of anomaly detection.\n",
+    "\n",
+    "Here, we create date features that can be used by the model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is done using the `date_features` argument. We can set it to `True` and it will generate all possible features from the given dates and frequency of the data. Alternatively, we can specify a list of features that we want. In this case, we want only features at the *month* and *year* level."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "anomalies_df_x = nixtla_client.detect_anomalies(\n",
+    "    df,\n",
+    "    time_col=\"timestamp\",\n",
+    "    target_col=\"value\",\n",
+    "    freq=\"D\",\n",
+    "    date_features=[\"month\", \"year\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then, we can plot the weights of each feature to understand its impact on anomaly detection."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.weights_x.plot.barh(x=\"features\", y=\"weights\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Modifying the confidence intervals\n",
+    "\n",
+    "We can tweak the confidence intervals using the `level` argument. This takes any values between 0 and 100, including decimal numbers.\n",
+    "\n",
+    "Reducing the confidence interval resutls in more anomalies being detected, while increasing it will reduce the number of anomalies.\n",
+    "\n",
+    "Here, for example, we reduce the interval to 70%, and we will notice more anomalies being plotted (red dots)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "anomalies_df = nixtla_client.detect_anomalies(\n",
+    "    df,\n",
+    "    time_col=\"timestamp\",\n",
+    "    target_col=\"value\",\n",
+    "    freq=\"D\",\n",
+    "    level=70,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(df, anomalies_df, time_col=\"timestamp\", target_col=\"value\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb b/sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb
new file mode 100644
index 0000000000..4573cebeac
--- /dev/null
+++ b/sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb
@@ -0,0 +1,444 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "745cabf8-eadb-4cd9-98c4-41e13d6b791f",
+   "metadata": {},
+   "source": [
+    "# Prerequisites\n",
+    "\n",
+    "Please make sure to follow these steps to start using TimeGEN: \n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `TimeGEN-1`.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "\n",
+    "* Deploy with \"Pay-as-you-go\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "24c899f2-78c1-43b2-8347-3164e3549c3f",
+   "metadata": {},
+   "source": [
+    "# Exogenous variables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a81fc39a-c6a0-485d-a3f3-c3a6298928a6",
+   "metadata": {},
+   "source": [
+    "Exogenous variables or external factors are crucial in time series forecasting as they provide additional information that might influence the prediction. These variables could include holiday markers, marketing spending, weather data, or any other external data that correlate with the time series data you are forecasting.\n",
+    "\n",
+    "For example, if you're forecasting ice cream sales, temperature data could serve as a useful exogenous variable. On hotter days, ice cream sales may increase.\n",
+    "\n",
+    "To incorporate exogenous variables in TimeGEN, you'll need to pair each point in your time series data with the corresponding external data.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bfa2ede9",
+   "metadata": {},
+   "source": [
+    "## 1. Import packages\n",
+    "First, we import the required packages and initialize the Nixtla client."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a84a0f65-e084-4e65-a0fb-d27c184dde44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from nixtla import NixtlaClient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "469d474a-c427-427c-a127-d140aeba0354",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client = NixtlaClient(\n",
+    "    base_url=\"you azure ai endpoint\",\n",
+    "    api_key=\"your api_key\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "054c3cad",
+   "metadata": {},
+   "source": [
+    "## 2. Load data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc2bb3db-00e6-44e6-8dc3-a2e0eba7e295",
+   "metadata": {},
+   "source": [
+    "Let's see an example on predicting day-ahead electricity prices. The following dataset contains the hourly electricity price (`y` column) for five markets in Europe and US, identified by the `unique_id` column. The columns from `Exogenous1` to `day_6` are exogenous variables that TimeGEN will use to predict the prices."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2fec19dc-48dd-4337-8678-fe3753b5eb30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv\"\n",
+    ")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b8f00038",
+   "metadata": {},
+   "source": [
+    "## 3. Forecasting electricity prices using exogenous variables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eed41a2f-67ce-4812-8073-18b271b1592d",
+   "metadata": {},
+   "source": [
+    "To produce forecasts we also have to add the future values of the exogenous variables. Let's read this dataset. In this case, we want to predict 24 steps ahead, therefore each `unique_id` will have 24 observations.\n",
+    "\n",
+    "::: {.callout-important}\n",
+    "If you want to use exogenous variables when forecasting with TimeGEN, you need to have the future values of those exogenous variables too.\n",
+    "::: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95c03577-25f3-479f-a76e-fd5e4632da96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "future_ex_vars_df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-future-ex-vars.csv\"\n",
+    ")\n",
+    "future_ex_vars_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "99f1e41d-e5bf-4d01-aa68-1a7a7fbb579b",
+   "metadata": {},
+   "source": [
+    "Let's call the `forecast` method, adding this information:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d252a0e0-f393-4957-8173-230972fc7a40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fcst_ex_vars_df = nixtla_client.forecast(\n",
+    "    df=df, X_df=future_ex_vars_df, h=24, level=[80, 90]\n",
+    ")\n",
+    "fcst_ex_vars_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18f36e5c-f41f-4888-b279-97558b71c1bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(\n",
+    "    df[[\"unique_id\", \"ds\", \"y\"]],\n",
+    "    fcst_ex_vars_df,\n",
+    "    max_insample_length=365,\n",
+    "    level=[80, 90],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e945ad3c-63fd-4e51-9815-336306f60463",
+   "metadata": {},
+   "source": [
+    "We can also show the importance of the features."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef1c9df3-eff2-4984-a88f-00274b21b3cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.weights_x.plot.barh(x=\"features\", y=\"weights\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "80c432bd",
+   "metadata": {},
+   "source": [
+    "This plot shows that `Exogenous1` and `Exogenous2` are the most important for this forecasting task, as they have the largest weight."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "38cd05d6",
+   "metadata": {},
+   "source": [
+    "## 4. How to generate future exogenous variables?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b4b6b0e4",
+   "metadata": {},
+   "source": [
+    "In the example above, we just loaded the future exogenous variables. Often, these are not available because these variables are unknown. Hence, we need to forecast these too. \n",
+    "\n",
+    "::: {.callout-important}\n",
+    "If you would only include historic exogenous variables in your model, you would be _implicitly_ making assumptions about the future of these exogenous variables in your forecast. That's why TimeGEN requires you to explicitly incorporate the future of these exogenous variables too, so that you make your assumptions about these variables _explicit_.\n",
+    "::: \n",
+    "\n",
+    "Below, we'll show you how we can also forecast `Exogenous1` and `Exogenous2` separately, so that you can generate the future exogenous variables in case they are not available."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce9a9bce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We read the data and create separate dataframes for the historic exogenous that we want to forecast separately.\n",
+    "df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv\"\n",
+    ")\n",
+    "df_exog1 = df[[\"unique_id\", \"ds\", \"Exogenous1\"]]\n",
+    "df_exog2 = df[[\"unique_id\", \"ds\", \"Exogenous2\"]]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b5abf7f",
+   "metadata": {},
+   "source": [
+    "Next, we can use TimeGEN to forecast `Exogenous1` and `Exogenous2`. In this case, we assume these quantities can be separately forecast."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91eaa3f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fcst_ex1 = nixtla_client.forecast(df=df_exog1, h=24, target_col=\"Exogenous1\")\n",
+    "fcst_ex2 = nixtla_client.forecast(df=df_exog2, h=24, target_col=\"Exogenous2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44b4b4fd",
+   "metadata": {},
+   "source": [
+    "We can now start creating `X_df`, which contains the future exogenous variables."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53cfe8e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fcst_ex1 = fcst_ex1.rename(columns={\"TimeGPT\": \"Exogenous1\"})\n",
+    "fcst_ex2 = fcst_ex2.rename(columns={\"TimeGPT\": \"Exogenous2\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f62cec4a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_df = fcst_ex1.merge(fcst_ex2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "697fc8e4",
+   "metadata": {},
+   "source": [
+    "Next, we also need to add the `day_0` to `day_6` future exogenous variables. These are easy: this is just the weekday, which we can extract from the `ds` column."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "322e0197",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We have 7 days, for each day a separate column denoting 1/0\n",
+    "for i in range(7):\n",
+    "    X_df[f\"day_{i}\"] = 1 * (pd.to_datetime(X_df[\"ds\"]).dt.weekday == i)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f8113866",
+   "metadata": {},
+   "source": [
+    "We have now created `X_df`, let's investigate it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38ef56b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac313e02",
+   "metadata": {},
+   "source": [
+    "Let's compare it to our pre-loaded version:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2a0f524",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "future_ex_vars_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "716d3d38",
+   "metadata": {},
+   "source": [
+    "As you can see, the values for `Exogenous1` and `Exogenous2` are slightly different, which makes sense because we've made a forecast of these values with TimeGEN."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f78ab3ff",
+   "metadata": {},
+   "source": [
+    "Let's create a new forecast of our electricity prices with TimeGEN using our new `X_df`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a104659d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fcst_ex_vars_df_new = nixtla_client.forecast(df=df, X_df=X_df, h=24, level=[80, 90])\n",
+    "fcst_ex_vars_df_new.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b2ba53a1",
+   "metadata": {},
+   "source": [
+    "Let's create a combined dataframe with the two forecasts and plot the values to compare the forecasts."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62f20711",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fcst_ex_vars_df = fcst_ex_vars_df.rename(\n",
+    "    columns={\"TimeGPT\": \"TimeGPT-provided_exogenous\"}\n",
+    ")\n",
+    "fcst_ex_vars_df_new = fcst_ex_vars_df_new.rename(\n",
+    "    columns={\"TimeGPT\": \"TimeGPT-forecasted_exogenous\"}\n",
+    ")\n",
+    "\n",
+    "forecasts = fcst_ex_vars_df[[\"unique_id\", \"ds\", \"TimeGPT-provided_exogenous\"]].merge(\n",
+    "    fcst_ex_vars_df_new[[\"unique_id\", \"ds\", \"TimeGPT-forecasted_exogenous\"]]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "54fcf5cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(\n",
+    "    df[[\"unique_id\", \"ds\", \"y\"]],\n",
+    "    forecasts,\n",
+    "    max_insample_length=365,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "019c6510",
+   "metadata": {},
+   "source": [
+    "As you can see, we obtain a slightly different forecast if we use our forecasted exogenous variables. "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb b/sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb
new file mode 100644
index 0000000000..fad4738c6a
--- /dev/null
+++ b/sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb
@@ -0,0 +1,605 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prerequisites\n",
+    "\n",
+    "Please make sure to follow these steps to start using TimeGEN: \n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `TimeGEN-1`.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "\n",
+    "* Deploy with \"Pay-as-you-go\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla` and `pandas`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Forecasting Demand"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this tutorial, we show how to use TimeGEN on an intermittent series where we have many values at zero. Here, we use a subset of the M5 dataset that tracks the demand for food items in a Californian store. The dataset also includes exogenous variables like the sell price and the type of event occuring at a particular day.\n",
+    "\n",
+    "TimeGEN achieves the best performance at a MAE of 0.49, which represents a **14% improvement** over the best statistical model specifically built to handle intermittent time series data.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `nixtla`, `pandas`, `numpy`, `utilsforecast`, `statsforecast`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install nixtla pandas numpy utilsforecast statsforecast\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initial setup\n",
+    "\n",
+    "We start off by importing the required packages for this tutorial and create an instace of `NixtlaClient`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from nixtla import NixtlaClient\n",
+    "\n",
+    "from utilsforecast.losses import mae\n",
+    "from utilsforecast.evaluation import evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client = NixtlaClient(\n",
+    "    base_url=\"you azure ai endpoint\",\n",
+    "    api_key=\"your api_key\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We now read the dataset and plot it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/m5_sales_exog_small.csv\"\n",
+    ")\n",
+    "df[\"ds\"] = pd.to_datetime(df[\"ds\"])\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(\n",
+    "    df,\n",
+    "    max_insample_length=365,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the figure above, we can see the intermittent nature of this dataset, with many periods with zero demand.\n",
+    "\n",
+    "Now, let's use TimeGEN to forecast the demand of each product."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Bounded forecasts\n",
+    "\n",
+    "To avoid getting negative predictions coming from the model, we use a log transformation on the data. That way, the model will be forced to predict only positive values.\n",
+    "\n",
+    "Note that due to the presence of zeros in our dataset, we add one to all points before taking the log."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_transformed = df.copy()\n",
+    "\n",
+    "df_transformed[\"y\"] = np.log(df_transformed[\"y\"] + 1)\n",
+    "\n",
+    "df_transformed.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, let's keep the last 28 time steps for the test set and use the rest as input to the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df = df_transformed.groupby(\"unique_id\").tail(28)\n",
+    "\n",
+    "input_df = df_transformed.drop(test_df.index).reset_index(drop=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Forecasting with TimeGEN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start = time.time()\n",
+    "\n",
+    "fcst_df = nixtla_client.forecast(\n",
+    "    df=input_df,\n",
+    "    h=28,\n",
+    "    level=[80],  # Generate a 80% confidence interval\n",
+    "    finetune_steps=10,  # Specify the number of steps for fine-tuning\n",
+    "    finetune_loss=\"mae\",  # Use the MAE as the loss function for fine-tuning\n",
+    "    time_col=\"ds\",\n",
+    "    target_col=\"y\",\n",
+    "    id_col=\"unique_id\",\n",
+    ")\n",
+    "\n",
+    "end = time.time()\n",
+    "\n",
+    "TimeGEN_duration = end - start\n",
+    "\n",
+    "print(f\"Time (TimeGEN): {TimeGEN_duration}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Great! TimeGEN was done in **5.8 seconds** and we now have predictions. However, those predictions are transformed, so we need to inverse the transformation to get back to the orignal scale. Therefore, we take the exponential and subtract one from each data point."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cols = [col for col in fcst_df.columns if col not in [\"ds\", \"unique_id\"]]\n",
+    "\n",
+    "for col in cols:\n",
+    "    fcst_df[col] = np.exp(fcst_df[col]) - 1\n",
+    "\n",
+    "fcst_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Before measuring the performance metric, let's plot the predictions against the actual values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nixtla_client.plot(\n",
+    "    test_df, fcst_df, models=[\"TimeGPT\"], level=[80], time_col=\"ds\", target_col=\"y\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, we can measure the mean absolute error (MAE) of the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fcst_df[\"ds\"] = pd.to_datetime(fcst_df[\"ds\"])\n",
+    "\n",
+    "test_df = pd.merge(test_df, fcst_df, \"left\", [\"unique_id\", \"ds\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "evaluation = evaluate(\n",
+    "    test_df, metrics=[mae], models=[\"TimeGPT\"], target_col=\"y\", id_col=\"unique_id\"\n",
+    ")\n",
+    "\n",
+    "average_metrics = evaluation.groupby(\"metric\")[\"TimeGPT\"].mean()\n",
+    "average_metrics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Forecasting with statistical models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The library `statsforecast` by Nixtla provides a suite of statistical models specifically built for intermittent forecasting, such as Croston, IMAPA and TSB. Let's use these models and see how they perform against TimeGEN."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from statsforecast import StatsForecast\n",
+    "from statsforecast.models import CrostonClassic, CrostonOptimized, IMAPA, TSB"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, we use four models: two versions of Croston, IMAPA and TSB."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "models = [CrostonClassic(), CrostonOptimized(), IMAPA(), TSB(0.1, 0.1)]\n",
+    "\n",
+    "sf = StatsForecast(models=models, freq=\"D\", n_jobs=-1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then, we can fit the models on our data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start = time.time()\n",
+    "\n",
+    "sf.fit(df=input_df)\n",
+    "\n",
+    "sf_preds = sf.predict(h=28)\n",
+    "\n",
+    "end = time.time()\n",
+    "\n",
+    "sf_duration = end - start\n",
+    "\n",
+    "print(f\"Statistical models took :{sf_duration}s\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, fitting and predicting with four statistical models took 5.2 seconds, while TimeGEN took 5.8 seconds, so TimeGEN was only 0.6 seconds slower."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Again, we need to inverse the transformation. Remember that the training data was previously transformed using the log function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cols = [col for col in sf_preds.columns if col not in [\"ds\", \"unique_id\"]]\n",
+    "\n",
+    "for col in cols:\n",
+    "    sf_preds[col] = np.exp(sf_preds[col]) - 1\n",
+    "\n",
+    "sf_preds.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, let's combine the predictions from all methods and see which performs best."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df = pd.merge(test_df, sf_preds, \"left\", [\"unique_id\", \"ds\"])\n",
+    "test_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "evaluation = evaluate(\n",
+    "    test_df,\n",
+    "    metrics=[mae],\n",
+    "    models=[\"TimeGPT\", \"CrostonClassic\", \"CrostonOptimized\", \"IMAPA\", \"TSB\"],\n",
+    "    target_col=\"y\",\n",
+    "    id_col=\"unique_id\",\n",
+    ")\n",
+    "\n",
+    "average_metrics = evaluation.groupby(\"metric\")[\n",
+    "    [\"TimeGPT\", \"CrostonClassic\", \"CrostonOptimized\", \"IMAPA\", \"TSB\"]\n",
+    "].mean()\n",
+    "average_metrics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the table above, we can see that TimeGEN achieves the lowest MAE, achieving a 12.8% improvement over the best performing statistical model.\n",
+    "\n",
+    "Now, this was done without using any of the available exogenous features. While the statsitical models do not support them, let's try including them in TimeGEN."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Forecasting with exogenous variables using TimeGEN"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To forecast with exogenous variables, we need to specify their future values over the forecast horizon. Therefore, let's simply take the types of events, as those dates are known in advance. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "futr_exog_df = test_df.drop(\n",
+    "    [\n",
+    "        \"TimeGPT\",\n",
+    "        \"CrostonClassic\",\n",
+    "        \"CrostonOptimized\",\n",
+    "        \"IMAPA\",\n",
+    "        \"TSB\",\n",
+    "        \"y\",\n",
+    "        \"TimeGPT-lo-80\",\n",
+    "        \"TimeGPT-hi-80\",\n",
+    "        \"sell_price\",\n",
+    "    ],\n",
+    "    axis=1,\n",
+    ")\n",
+    "futr_exog_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then, we simply call the `forecast` method and pass the `futr_exog_df` in the `X_df` parameter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start = time.time()\n",
+    "\n",
+    "fcst_df = nixtla_client.forecast(\n",
+    "    df=input_df,\n",
+    "    X_df=futr_exog_df,\n",
+    "    h=28,\n",
+    "    level=[80],  # Generate a 80% confidence interval\n",
+    "    finetune_steps=10,  # Specify the number of steps for fine-tuning\n",
+    "    finetune_loss=\"mae\",  # Use the MAE as the loss function for fine-tuning\n",
+    "    time_col=\"ds\",\n",
+    "    target_col=\"y\",\n",
+    "    id_col=\"unique_id\",\n",
+    ")\n",
+    "\n",
+    "end = time.time()\n",
+    "\n",
+    "TimeGEN_duration = end - start\n",
+    "\n",
+    "print(f\"Time (TimeGEN): {TimeGEN_duration}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Great! Remember that the predictions are transformed, so we have to inverse the transformation again."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fcst_df.rename(\n",
+    "    columns={\n",
+    "        \"TimeGPT\": \"TimeGPT_ex\",\n",
+    "    },\n",
+    "    inplace=True,\n",
+    ")\n",
+    "\n",
+    "cols = [col for col in fcst_df.columns if col not in [\"ds\", \"unique_id\"]]\n",
+    "\n",
+    "for col in cols:\n",
+    "    fcst_df[col] = np.exp(fcst_df[col]) - 1\n",
+    "\n",
+    "fcst_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, let's evaluate the performance of TimeGEN with exogenous features."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df[\"TimeGPT_ex\"] = fcst_df[\"TimeGPT_ex\"].values\n",
+    "test_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "evaluation = evaluate(\n",
+    "    test_df,\n",
+    "    metrics=[mae],\n",
+    "    models=[\n",
+    "        \"TimeGPT\",\n",
+    "        \"CrostonClassic\",\n",
+    "        \"CrostonOptimized\",\n",
+    "        \"IMAPA\",\n",
+    "        \"TSB\",\n",
+    "        \"TimeGPT_ex\",\n",
+    "    ],\n",
+    "    target_col=\"y\",\n",
+    "    id_col=\"unique_id\",\n",
+    ")\n",
+    "\n",
+    "average_metrics = evaluation.groupby(\"metric\")[\n",
+    "    [\"TimeGPT\", \"CrostonClassic\", \"CrostonOptimized\", \"IMAPA\", \"TSB\", \"TimeGPT_ex\"]\n",
+    "].mean()\n",
+    "average_metrics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From the table above, we can see that using exogenous features improved the performance of TimeGEN. Now, it represents a 14% improvement over the best statistical model. \n",
+    "\n",
+    "Using TimeGEN with exogenous features took 6.8 seconds. This is 1.6 seconds slower than statitstical models, but it resulted in much better predictions."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 38bcd7181faa33782db18f903c52d3ea9fc4f400 Mon Sep 17 00:00:00 2001
From: Rupal jain <rupaljain@microsoft.com>
Date: Tue, 21 May 2024 08:55:52 +0530
Subject: [PATCH 2/6] [Vision & Multimodal] Example Updates (#3179)

* adding continue_on_step_failure False and force_rerun True for vision pipelines

* adding continue_on_step_failure False and force_rerun True for vision pipelines

* adding continue_on_step_failure False and force_rerun True for vision pipelines

* Model version updates

* Model version updates
---
 .../multiclass-classification/readme.md             |  8 ++++----
 .../multilabel-classification/readme.md             |  8 ++++----
 .../finetune/image-instance-segmentation/readme.md  |  2 +-
 .../finetune/image-object-detection/readme.md       | 13 +++++++------
 .../image-multiclass-classification.ipynb           |  2 +-
 .../image-multilabel-classification.ipynb           |  2 +-
 .../image-instance-segmentation.ipynb               |  2 +-
 .../image-object-detection.ipynb                    |  2 +-
 ...rs-fridgeobjects-multiclass-classification.ipynb |  8 ++++----
 ...rs-fridgeobjects-multilabel-classification.ipynb |  8 ++++----
 ...ection-fridgeobjects-instance-segmentation.ipynb |  2 +-
 ...mmdetection-fridgeobjects-object-detection.ipynb | 12 ++++++------
 .../mmtracking-video-multi-object-tracking.ipynb    |  4 ++--
 13 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/readme.md b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/readme.md
index 629d1950c4..1f31de1cdf 100644
--- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/readme.md
+++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/readme.md
@@ -7,8 +7,8 @@ For using this component, run the shell script file `bash ./hftransformers-fridg
 Currently following models are supported:
 | Model Name | Source |
 | ------ | ---------- |
-| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/11) | azureml registry |
-| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/11) | azureml registry |
-| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/10) | azureml registry |
-| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/10) | azureml registry |
+| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/19) | azureml registry |
+| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/20) | azureml registry |
+| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/19) | azureml registry |
+| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/17) | azureml registry |
 | [Image classification models from Huggingface's Transformer library](https://huggingface.co/models?pipeline_tag=image-classification&library=transformers)| HuggingFace |
diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/readme.md b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/readme.md
index aea5acd50b..40234cebe1 100644
--- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/readme.md
+++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/readme.md
@@ -7,8 +7,8 @@ For using this component, run the shell script file `bash ./hftransformers-fridg
 Currently following models are supported:
 | Model Name | Source |
 | ------ | ---------- |
-| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/11) | azureml registry |
-| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/11) | azureml registry |
-| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/10) | azureml registry |
-| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/10) | azureml registry |
+| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/19) | azureml registry |
+| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/20) | azureml registry |
+| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/19) | azureml registry |
+| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/17) | azureml registry |
 | [Image classification models from Huggingface's Transformer library](https://huggingface.co/models?pipeline_tag=image-classification&library=transformers)| HuggingFace |
diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/readme.md b/cli/foundation-models/system/finetune/image-instance-segmentation/readme.md
index 8e2d8e3f51..240e3a4a97 100644
--- a/cli/foundation-models/system/finetune/image-instance-segmentation/readme.md
+++ b/cli/foundation-models/system/finetune/image-instance-segmentation/readme.md
@@ -7,5 +7,5 @@ For using this component for instance segmentation, run the shell script file `b
 Currently following models are supported:
 | Model Name | Source |
 | :------------: | :-------:  |
-| [mask-rcnn_swin-t-p4-w7_fpn_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-mask-rcnn_swin-t-p4-w7_fpn_1x_coco/version/8) | azureml registry |
+| [mmd-3x-mask-rcnn_swin-t-p4-w7_fpn_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-mask-rcnn_swin-t-p4-w7_fpn_1x_coco/version/14) | azureml registry |
 | [Image instance-segmentation models from MMDetection](https://github.com/open-mmlab/mmdetection/blob/v3.1.0/docs/en/model_zoo.md) | MMDetection |
diff --git a/cli/foundation-models/system/finetune/image-object-detection/readme.md b/cli/foundation-models/system/finetune/image-object-detection/readme.md
index c1a7ba04c1..6b95d60e08 100644
--- a/cli/foundation-models/system/finetune/image-object-detection/readme.md
+++ b/cli/foundation-models/system/finetune/image-object-detection/readme.md
@@ -5,12 +5,13 @@ You can launch a sample pipeline for image object detection using `mmdetection_i
 For using this component for object detection, run the shell script file `bash ./mmdetection-fridgeobjects-detection.sh`.
 
 Currently following models are supported:
+
 | Model Name | Source |
 | :------------: | :-------:  |
-| [deformable-detr_refine_twostage_r50_16xb2-50e_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-deformable-detr_refine_twostage_r50_16xb2-50e_coco/version/8) | azureml registry |
-| [sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco/version/8) | azureml registry |
-| [sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco/version/8) | azureml registry |
-| [vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco/version/8) | azureml registry |
-| [vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco/version/8) | azureml registry |
-| [yolof_r50_c5_8x8_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-yolof_r50_c5_8x8_1x_coco/version/8) | azureml registry |
+| [mmd-3x-deformable-detr_refine_twostage_r50_16xb2-50e_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-deformable-detr_refine_twostage_r50_16xb2-50e_coco/version/12) | azureml registry |
+| [mmd-3x-sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco/version/12) | azureml registry |
+| [mmd-3x-sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco/version/12) | azureml registry |
+| [mmd-3x-vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco/version/12) | azureml registry |
+| [mmd-3x-vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco/version/12) | azureml registry |
+| [mmd-3x-yolof_r50_c5_8x8_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-yolof_r50_c5_8x8_1x_coco/version/12) | azureml registry |
 | [Image object detection models from MMDetection](https://github.com/open-mmlab/mmdetection/blob/v3.1.0/docs/en/model_zoo.md) | MMDetection |
diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb
index bb269de9b9..69242997e5 100644
--- a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb
+++ b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb
@@ -220,7 +220,7 @@
     "]\n",
     "for model in registry_models:\n",
     "    all_models = registry_ml_client.models.list(model[\"name\"])\n",
-    "    latest_model = max(all_models, key=lambda x: x.version)\n",
+    "    latest_model = max(all_models, key=lambda x: int(x.version))\n",
     "    print(latest_model.id)"
    ]
   },
diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb
index c20d84faa6..eac3e07c23 100644
--- a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb
+++ b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb
@@ -221,7 +221,7 @@
     "]\n",
     "for model in registry_models:\n",
     "    all_models = registry_ml_client.models.list(model[\"name\"])\n",
-    "    latest_model = max(all_models, key=lambda x: x.version)\n",
+    "    latest_model = max(all_models, key=lambda x: int(x.version))\n",
     "    print(latest_model.id)"
    ]
   },
diff --git a/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb b/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb
index 98c14d563b..47b179bb0f 100644
--- a/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb
+++ b/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb
@@ -219,7 +219,7 @@
     "]\n",
     "for model in registry_models:\n",
     "    all_models = registry_ml_client.models.list(model[\"name\"])\n",
-    "    latest_model = max(all_models, key=lambda x: x.version)\n",
+    "    latest_model = max(all_models, key=lambda x: int(x.version))\n",
     "    print(latest_model.id)"
    ]
   },
diff --git a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb
index f0ddccef36..796e749e7a 100644
--- a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb
+++ b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb
@@ -220,7 +220,7 @@
     "]\n",
     "for model in registry_models:\n",
     "    all_models = registry_ml_client.models.list(model[\"name\"])\n",
-    "    latest_model = max(all_models, key=lambda x: x.version)\n",
+    "    latest_model = max(all_models, key=lambda x: int(x.version))\n",
     "    print(latest_model.id)"
    ]
   },
diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb
index 47f160a590..d0e16cbfc5 100644
--- a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb
+++ b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb
@@ -235,10 +235,10 @@
     "\n",
     "| Model Name | Source |\n",
     "| ------ | ---------- |\n",
-    "| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/11) | azureml registry |\n",
-    "| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/11) | azureml registry |\n",
-    "| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/10) | azureml registry |\n",
-    "| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/10) | azureml registry |\n",
+    "| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/19) | azureml registry |\n",
+    "| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/20) | azureml registry |\n",
+    "| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/19) | azureml registry |\n",
+    "| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/17) | azureml registry |\n",
     "| [Image classification models from Huggingface's Transformer library](https://huggingface.co/models?pipeline_tag=image-classification&library=transformers)| HuggingFace |"
    ]
   },
diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb
index 2c2b104105..53c5c9cde5 100644
--- a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb
+++ b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb
@@ -236,10 +236,10 @@
     "\n",
     "| Model Name | Source |\n",
     "| ------ | ---------- |\n",
-    "| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/11) | azureml registry |\n",
-    "| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/11) | azureml registry |\n",
-    "| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/10) | azureml registry |\n",
-    "| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/10) | azureml registry |\n",
+    "| [microsoft-beit-base-patch16-224-pt22k-ft22k](https://ml.azure.com/registries/azureml/models/microsoft-beit-base-patch16-224-pt22k-ft22k/version/19) | azureml registry |\n",
+    "| [microsoft-swinv2-base-patch4-window12-192-22k](https://ml.azure.com/registries/azureml/models/microsoft-swinv2-base-patch4-window12-192-22k/version/20) | azureml registry |\n",
+    "| [facebook-deit-base-patch16-224](https://ml.azure.com/registries/azureml/models/facebook-deit-base-patch16-224/version/19) | azureml registry |\n",
+    "| [google-vit-base-patch16-224](https://ml.azure.com/registries/azureml/models/google-vit-base-patch16-224/version/17) | azureml registry |\n",
     "| [Image classification models from Huggingface's Transformer library](https://huggingface.co/models?pipeline_tag=image-classification&library=transformers)| HuggingFace |"
    ]
   },
diff --git a/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb b/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb
index e24c7e9d4a..e7f2825f2a 100644
--- a/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb
+++ b/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb
@@ -253,7 +253,7 @@
     "\n",
     "| Model Name | Source |\n",
     "| :------------: | :-------:  |\n",
-    "| [mask-rcnn_swin-t-p4-w7_fpn_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-mask-rcnn_swin-t-p4-w7_fpn_1x_coco/version/8) | azureml registry |\n",
+    "| [mmd-3x-mask-rcnn_swin-t-p4-w7_fpn_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-mask-rcnn_swin-t-p4-w7_fpn_1x_coco/version/14) | azureml registry |\n",
     "| [Image instance-segmentation models from MMDetection](https://github.com/open-mmlab/mmdetection/blob/v3.1.0/docs/en/model_zoo.md) | MMDetection |"
    ]
   },
diff --git a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb
index 94e21266a1..0aa27b280b 100644
--- a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb
+++ b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb
@@ -253,12 +253,12 @@
     "\n",
     "| Model Name | Source |\n",
     "| :------------: | :-------:  |\n",
-    "| [deformable-detr_refine_twostage_r50_16xb2-50e_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-deformable-detr_refine_twostage_r50_16xb2-50e_coco/version/8) | azureml registry |\n",
-    "| [sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco/version/8) | azureml registry |\n",
-    "| [sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco/version/8) | azureml registry |\n",
-    "| [vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco/version/8) | azureml registry |\n",
-    "| [vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco/version/8) | azureml registry |\n",
-    "| [yolof_r50_c5_8x8_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-yolof_r50_c5_8x8_1x_coco/version/8) | azureml registry |\n",
+    "| [mmd-3x-deformable-detr_refine_twostage_r50_16xb2-50e_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-deformable-detr_refine_twostage_r50_16xb2-50e_coco/version/12) | azureml registry |\n",
+    "| [mmd-3x-sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco/version/12) | azureml registry |\n",
+    "| [mmd-3x-sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco/version/12) | azureml registry |\n",
+    "| [mmd-3x-vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco/version/12) | azureml registry |\n",
+    "| [mmd-3x-vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco/version/12) | azureml registry |\n",
+    "| [mmd-3x-yolof_r50_c5_8x8_1x_coco](https://ml.azure.com/registries/azureml/models/mmd-3x-yolof_r50_c5_8x8_1x_coco/version/12) | azureml registry |\n",
     "| [Image object detection models from MMDetection](https://github.com/open-mmlab/mmdetection/blob/v3.1.0/docs/en/model_zoo.md) | MMDetection |"
    ]
   },
diff --git a/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb
index e147fd6867..d48a53257f 100644
--- a/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb
+++ b/sdk/python/foundation-models/system/finetune/video-multi-object-tracking/mmtracking-video-multi-object-tracking.ipynb
@@ -201,8 +201,8 @@
     "\n",
     "| Model Name | Source |\n",
     "| :------------: | :-------:  |\n",
-    "| [bytetrack_yolox_x_crowdhuman-mot17_private-half](https://ml.azure.com/registries/azureml/models/bytetrack_yolox_x_crowdhuman_mot17-private-half/version/3) | azureml registry |\n",
-    "| [ocsort_yolox_x_crowdhuman_mot17-private-half](https://ml.azure.com/registries/azureml/models/ocsort_yolox_x_crowdhuman_mot17-private-half/version/3) | azureml registry |\n",
+    "| [bytetrack_yolox_x_crowdhuman-mot17_private-half](https://ml.azure.com/registries/azureml/models/bytetrack_yolox_x_crowdhuman_mot17-private-half/version/6) | azureml registry |\n",
+    "| [ocsort_yolox_x_crowdhuman_mot17-private-half](https://ml.azure.com/registries/azureml/models/ocsort_yolox_x_crowdhuman_mot17-private-half/version/6) | azureml registry |\n",
     "| [Variants of bytetrack models from MMTracking](https://github.com/open-mmlab/mmtracking/tree/v0.14.0/configs/mot/bytetrack) | MMTracking |"
    ]
   },

From 0833e2784a6fca477e1779cc3f8ed7173adbbbfa Mon Sep 17 00:00:00 2001
From: hazemelh <45972073+hazemelh@users.noreply.github.com>
Date: Tue, 21 May 2024 07:09:00 -0700
Subject: [PATCH 3/6] Add JAIS MaaS Sample (#3187)

* Create webrequests.ipynb

* Create litellm.ipynb

* Create openaisdk.ipynb

* Update webrequests.ipynb

* Update webrequests.ipynb

Test with Llama change

* Update webrequests.ipynb

* Update webrequests.ipynb

* Update webrequests.ipynb
---
 .../foundation-models/jais/litellm.ipynb      | 161 ++++++++++++
 .../foundation-models/jais/openaisdk.ipynb    | 176 +++++++++++++
 .../foundation-models/jais/webrequests.ipynb  | 238 ++++++++++++++++++
 3 files changed, 575 insertions(+)
 create mode 100644 sdk/python/foundation-models/jais/litellm.ipynb
 create mode 100644 sdk/python/foundation-models/jais/openaisdk.ipynb
 create mode 100644 sdk/python/foundation-models/jais/webrequests.ipynb

diff --git a/sdk/python/foundation-models/jais/litellm.ipynb b/sdk/python/foundation-models/jais/litellm.ipynb
new file mode 100644
index 0000000000..59b7463e22
--- /dev/null
+++ b/sdk/python/foundation-models/jais/litellm.ipynb
@@ -0,0 +1,161 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Use litellm with JAIS in Azure AI and Azure ML\n",
+    "\n",
+    "Use `litellm` to consume JAIS deployments in Azure AI and Azure ML. Notice that JAIS in Azure only supports chat completions API.\n",
+    "\n",
+    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for the JAIS 30b Chat model in AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "Before we start, there are certain steps we need to take to deploy the models:\n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `Jais-30b-Chat` in the model catalog.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "\n",
+    "* Deploy with \"Serverless APIs\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for model deployment and inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `litellm`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install litellm\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example\n",
+    "\n",
+    "The following is an example about how to use `litellm` with a JAIS model deployed in Azure AI and Azure ML:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "imports"
+   },
+   "outputs": [],
+   "source": [
+    "import litellm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You will need to have a Endpoint url and Authentication Key associated with that endpoint. This can be acquired from previous steps. To work with `litellm`, configure the client as follows:\n",
+    "\n",
+    "- `base_url`: Use the endpoint URL from your deployment. Include the `/v1` in the URL.\n",
+    "- `api_key`: Use your API key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_client"
+   },
+   "outputs": [],
+   "source": [
+    "client = litellm.LiteLLM(\n",
+    "    base_url=\"https://<endpoint-name>.<region>.inference.ai.azure.com/v1\",\n",
+    "    api_key=\"<key>\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the client to create chat completions requests:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_invoke"
+   },
+   "outputs": [],
+   "source": [
+    "response = client.chat.completions.create(\n",
+    "    messages=[{\"content\": \"List the emirates of the UAE.\", \"role\": \"user\"}],\n",
+    "    model=\"openai\",\n",
+    "    custom_llm_provider=\"custom_openai\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The generated text can be accessed as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_response"
+   },
+   "outputs": [],
+   "source": [
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aditional resources\n",
+    "\n",
+    "Here are some additional reference:  \n",
+    "\n",
+    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio/how-to/costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK v2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/jais/openaisdk.ipynb b/sdk/python/foundation-models/jais/openaisdk.ipynb
new file mode 100644
index 0000000000..8ccaf3b8fd
--- /dev/null
+++ b/sdk/python/foundation-models/jais/openaisdk.ipynb
@@ -0,0 +1,176 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Use OpenAI SDK with JAIS in Azure AI and Azure ML\n",
+    "\n",
+    "Use `openai` SDK to consume JAIS deployments in Azure AI and Azure ML. The JAIS models in Azure AI and Azure ML offers an API compatible with the OpenAI Chat Completion API. It allows customers and users to transition seamlessly from OpenAI models to JAIS LLMs. \n",
+    "\n",
+    "The API can be directly used with OpenAI's client libraries or third-party tools, like LangChain or LlamaIndex.\n",
+    "\n",
+    "The example below shows how to make this transition using the OpenAI Python Library. Notice that the Azure AI chat completions API supports only a portion of the parameters supported by OpenAI API.\n",
+    "\n",
+    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for the JAIS models in AI Studio and ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "Before we start, there are certain steps we need to take to deploy the models:\n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `Jais-30b-Chat` in the model catalog.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "\n",
+    "* Deploy with \"Serverless APIs\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for model deployment and inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `openai`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install openai\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example\n",
+    "\n",
+    "The following is an example about how to use `openai` with a JAIS model deployed in Azure AI and Azure ML:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "imports"
+   },
+   "outputs": [],
+   "source": [
+    "from openai import OpenAI"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You will need to have a Endpoint url and Authentication Key associated with that endpoint. This can be acquired from previous steps. \n",
+    "To work with `openai`, configure the client as follows:\n",
+    "\n",
+    "- `base_url`: Use the endpoint URL from your deployment. Include `/v1` as part of the URL.\n",
+    "- `api_key`: Use your API key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_client"
+   },
+   "outputs": [],
+   "source": [
+    "client = OpenAI(\n",
+    "    base_url=\"https://<endpoint>.<region>.inference.ai.azure.com/v1\", api_key=\"<key>\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the client to create chat completions requests:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_invoke"
+   },
+   "outputs": [],
+   "source": [
+    "response = client.chat.completions.create(\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"List the emirates of the UAE.\",\n",
+    "        }\n",
+    "    ],\n",
+    "    model=\"azureai\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The generated text can be accessed as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_response"
+   },
+   "outputs": [],
+   "source": [
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notice that not all the parameters supported by the OpenAI API is supported. See Azure AI documentation for the full list of supported arguments."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aditional resources\n",
+    "\n",
+    "Here are some additional reference:  \n",
+    "\n",
+    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio/how-to/costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/jais/webrequests.ipynb b/sdk/python/foundation-models/jais/webrequests.ipynb
new file mode 100644
index 0000000000..3c00278528
--- /dev/null
+++ b/sdk/python/foundation-models/jais/webrequests.ipynb
@@ -0,0 +1,238 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Use Azure API with JAIS\n",
+    "\n",
+    "This notebook shows examples of how to use JAIS APIs offered by Microsoft Azure AI and Azure ML. We will cover:  \n",
+    "* HTTP requests API usage for Mistral pretrained and chat models in CLI\n",
+    "* HTTP requests API usage for Mistral pretrained and chat models in Python\n",
+    "\n",
+    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for the JAIS model for AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisite\n",
+    "\n",
+    "Before we start, there are certain steps we need to take to deploy the models:\n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n",
+    "* Create a project and resource group\n",
+    "* Select `jais-30b-chat` from the model catalog.\n",
+    "\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. \n",
+    "\n",
+    "* Deploy with \"Serverless APIs\"\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for model deployment and inference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## HTTP Requests API Usage in CLI\n",
+    "\n",
+    "### Basics\n",
+    "\n",
+    "For using the REST API, You will need to have an Endpoint URL and Authentication Key associated with that endpoint.  \n",
+    "This can be acquired from previous steps.  \n",
+    "\n",
+    "In this chat completion example, we use a simple curl call for illustration. There are three major components:  \n",
+    "\n",
+    "* The `host-url` is your endpoint url with chat completion schema `/v1/chat/completions`. \n",
+    "* The `headers` defines the content type as well as your api key. \n",
+    "* The `payload` or `data`, which is your prompt detail and model hyper parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!curl -X POST -L https://your-endpoint.inference.ai.azure.com/v1/chat/completions -H 'Content-Type: application/json' -H 'Authorization: your-auth-key' -d '{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"What is good about the UAE?\",\"role\":\"user\"}], \"max_tokens\": 500}'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Streaming\n",
+    "\n",
+    "One fantastic feature the API offers is the streaming capability. Streaming allows the generated tokens to be sent as data-only server-sent events whenever they become available. This is extremely important for interactive applications such as chatbots, so the user is always engaged.  \n",
+    "\n",
+    "To use streaming, simply set `\"stream\":\"True\"` as part of the request payload.  \n",
+    "In the streaming mode, the REST API response will be different from non-streaming mode.\n",
+    "\n",
+    "Here is an example: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!curl -X POST -L https://your-endpoint.inference.ai.azure.com/v1/chat/completions -H 'Content-Type: application/json' -H 'Authorization: your-auth-key' -d '{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"What is good about Wuhan?\",\"role\":\"user\"}], \"max_tokens\": 500, \"stream\": \"True\"}'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see the result comes back as a stream of `data` objects, each contains generated information including a `choice`.  \n",
+    "The stream terminated by a `data:[DONE]\\n\\n` message."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## HTTP Requests API Usage in Python\n",
+    "\n",
+    "Besides calling the API directly from command line tools, you can also programatically call them in Python. Here is a chat completion example:\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import urllib.request\n",
+    "import json\n",
+    "\n",
+    "# Configure payload data sending to API endpoint\n",
+    "data = {\n",
+    "    \"messages\": [\n",
+    "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
+    "        {\"role\": \"user\", \"content\": \"What is good about the UAE?\"},\n",
+    "    ],\n",
+    "    \"max_tokens\": 500,\n",
+    "    \"temperature\": 0.9,\n",
+    "}\n",
+    "\n",
+    "body = str.encode(json.dumps(data))\n",
+    "\n",
+    "# Replace the url with your API endpoint\n",
+    "url = \"https://your-endpoint.inference.ai.azure.com/v1/chat/completions\"\n",
+    "\n",
+    "# Replace this with the key for the endpoint\n",
+    "api_key = \"your-auth-key\"\n",
+    "if not api_key:\n",
+    "    raise Exception(\"API Key is missing\")\n",
+    "\n",
+    "headers = {\"Content-Type\": \"application/json\", \"Authorization\": (api_key)}\n",
+    "\n",
+    "req = urllib.request.Request(url, body, headers)\n",
+    "\n",
+    "try:\n",
+    "    response = urllib.request.urlopen(req)\n",
+    "    result = response.read()\n",
+    "    print(result)\n",
+    "except urllib.error.HTTPError as error:\n",
+    "    print(\"The request failed with status code: \" + str(error.code))\n",
+    "    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure\n",
+    "    print(error.info())\n",
+    "    print(error.read().decode(\"utf8\", \"ignore\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "However in this example, the content returns back as a single payload. It didn't stream as a serial of data events as we wished. To build true streaming capabilities utilizing the API endpoint, we will utilize [`requests`](https://requests.readthedocs.io/en/latest/) library instead."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Streaming in Python\n",
+    "\n",
+    "`Requests` library is a simple HTTP library for Python built with [`urllib3`](https://github.com/urllib3/urllib3). It automatically maintains the keep-alive and HTTP connection pooling. With the `Session` class, we can easily stream the result from our API calls.  \n",
+    "\n",
+    "Here is a quick example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import requests\n",
+    "\n",
+    "data = {\n",
+    "    \"messages\": [\n",
+    "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
+    "        {\"role\": \"user\", \"content\": \"What is good about the UAE?\"},\n",
+    "    ],\n",
+    "    \"max_tokens\": 500,\n",
+    "    \"temperature\": 0.9,\n",
+    "    \"stream\": \"True\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "def post_stream(url):\n",
+    "    s = requests.Session()\n",
+    "    api_key = \"your-auth-key\"\n",
+    "    headers = {\"Content-Type\": \"application/json\", \"Authorization\": (api_key)}\n",
+    "\n",
+    "    with s.post(url, data=json.dumps(data), headers=headers, stream=True) as resp:\n",
+    "        print(resp.status_code)\n",
+    "        for line in resp.iter_lines():\n",
+    "            if line:\n",
+    "                print(line)\n",
+    "\n",
+    "\n",
+    "url = \"https://your-endpoint.inference.ai.azure.com/v1/chat/completions\"\n",
+    "post_stream(url)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aditional resources\n",
+    "\n",
+    "Here are some additional reference:  \n",
+    "\n",
+    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio/how-to/costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK v2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From e1564539f158940d180113c64ad8875665a41aa0 Mon Sep 17 00:00:00 2001
From: shubhamiit <41925087+shubhamiit@users.noreply.github.com>
Date: Tue, 21 May 2024 20:11:19 +0530
Subject: [PATCH 4/6] SDK, CLI examples for phi-3-mini (#3195)

* Add sdk example for phi-3-min-v

* Added CLI as well

* Update SKU and model name

* Update deploy.yml

* Update sample_chat_completions_score.json

* Update sample_chat_completions_score.json

* Update online-endpoint-chat-completions-inference.ipynb

* updates to examples

* updates to examples

* updates to examples

* reviews

* apply black formatting on the notebook

* clean up imports

* add sdk sample for aoai style inference

---------

Co-authored-by: Rupal jain <rupaljain@microsoft.com>
Co-authored-by: svaruag <gasi@microsoft.com>
---
 .../visual-chat-completion/deploy.yml         |  11 +
 .../online-endpoint-deployment.sh             |  79 ++++
 .../sample_chat_completions_score.json        |  22 ++
 .../image-text-to-text-online-endpoint.ipynb  | 358 ++++++++++++++++++
 .../sample_chat_completions_score.json        |  25 ++
 5 files changed, 495 insertions(+)
 create mode 100644 cli/foundation-models/system/inference/visual-chat-completion/deploy.yml
 create mode 100644 cli/foundation-models/system/inference/visual-chat-completion/online-endpoint-deployment.sh
 create mode 100644 cli/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json
 create mode 100644 sdk/python/foundation-models/system/inference/visual-chat-completion/image-text-to-text-online-endpoint.ipynb
 create mode 100644 sdk/python/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json

diff --git a/cli/foundation-models/system/inference/visual-chat-completion/deploy.yml b/cli/foundation-models/system/inference/visual-chat-completion/deploy.yml
new file mode 100644
index 0000000000..9cca346616
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-chat-completion/deploy.yml
@@ -0,0 +1,11 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: phi-3-vision
+instance_type: Standard_NC48ads_A100_v4
+instance_count: 1
+liveness_probe:
+  initial_delay: 180
+  period: 180
+  failure_threshold: 49
+  timeout: 299
+request_settings:
+  request_timeout_ms: 180000
diff --git a/cli/foundation-models/system/inference/visual-chat-completion/online-endpoint-deployment.sh b/cli/foundation-models/system/inference/visual-chat-completion/online-endpoint-deployment.sh
new file mode 100644
index 0000000000..21794231af
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-chat-completion/online-endpoint-deployment.sh
@@ -0,0 +1,79 @@
+set -x
+
+# script inputs
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+registry_name="azureml"
+model_name="Phi-3-vision-128k-instruct"
+deployment_sku="Standard_NC48ads_A100_v4"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="phi-3-mini-v-instruct-$version"
+
+# scoring_file
+scoring_file="./sample_chat_completions_score.json"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# 3. Deploy the model to an endpoint
+# create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $scoring_file ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    cat $scoring_file
+    echo "\n\n"
+else
+    echo "Scoring file $scoring_file does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
diff --git a/cli/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json b/cli/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json
new file mode 100644
index 0000000000..a95097efe9
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json
@@ -0,0 +1,22 @@
+{
+  "input_data": {
+    "input_string": [
+      {
+        "role": "user",
+        "content": [
+          {
+            "type": "image_url",
+            "image_url": {
+              "url": "https://www.ilankelman.org/stopsigns/australia.jpg"
+            }
+          },
+          {
+            "type": "text",
+            "text": "What is shown in this image? Be extremely detailed and specific."
+          }
+        ]
+      }
+    ],
+    "parameters": { "temperature": 0.7, "max_new_tokens": 2048 }
+  }
+}
diff --git a/sdk/python/foundation-models/system/inference/visual-chat-completion/image-text-to-text-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/visual-chat-completion/image-text-to-text-online-endpoint.ipynb
new file mode 100644
index 0000000000..81441ccc84
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/visual-chat-completion/image-text-to-text-online-endpoint.ipynb
@@ -0,0 +1,358 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## (Visual) Chat Completion inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `Phi-3-mini-v-128k-instruct` to an online endpoint for inference.\n",
+    "\n",
+    "### Outline\n",
+    "* Set up pre-requisites\n",
+    "* Pick a model to deploy\n",
+    "* Download and prepare data for inference\n",
+    "* Deploy the model for real time inference\n",
+    "* Test the endpoint\n",
+    "* Test the endpoint using Azure OpenAI style payload\n",
+    "* Clean up resources"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Set up pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    ")\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "    workspace_name = workspace_ml_client.workspace_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace_name = \"<WORKSPACE_NAME>\"\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, workspace_name\n",
+    ")\n",
+    "\n",
+    "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(credential, registry_name=\"azureml\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Deploy the model to an online endpoint\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"Phi-3-vision-128k-instruct\"\n",
+    "\n",
+    "version_list = list(registry_ml_client.models.list(model_name))\n",
+    "if len(version_list) == 0:\n",
+    "    print(\"Model not found in registry\")\n",
+    "else:\n",
+    "    model_version = version_list[0].version\n",
+    "    foundation_model = registry_ml_client.models.get(model_name, model_version)\n",
+    "    print(\n",
+    "        \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n",
+    "            foundation_model.name, foundation_model.version, foundation_model.id\n",
+    "        )\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n",
+    "\n",
+    "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = model_name[:13] + str(timestamp)\n",
+    "print(f\"Creating online endpoint with name: {online_endpoint_name}\")\n",
+    "\n",
+    "# create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=f\"Online endpoint for {foundation_model.name}, for visual chat-completion task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import OnlineRequestSettings, ProbeSettings\n",
+    "\n",
+    "# create a deployment\n",
+    "deployment_name = \"phi-3-vision\"\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_NC48ads_A100_v4\",\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        request_timeout_ms=180000,\n",
+    "        max_queue_wait_ms=500,\n",
+    "    ),\n",
+    "    liveness_probe=ProbeSettings(\n",
+    "        failure_threshold=49,\n",
+    "        success_threshold=1,\n",
+    "        timeout=299,\n",
+    "        period=180,\n",
+    "        initial_delay=180,\n",
+    "    ),\n",
+    "    readiness_probe=ProbeSettings(\n",
+    "        failure_threshold=10,\n",
+    "        success_threshold=1,\n",
+    "        timeout=10,\n",
+    "        period=10,\n",
+    "        initial_delay=10,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {deployment_name: 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Test the endpoint with sample data\n",
+    "\n",
+    "We will send a sample request to the model, using the json that we create below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "test_json = {\n",
+    "    \"input_data\": {\n",
+    "        \"input_string\": [\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"image_url\",\n",
+    "                        \"image_url\": {\n",
+    "                            \"url\": \"https://www.ilankelman.org/stopsigns/australia.jpg\"\n",
+    "                        },\n",
+    "                    },\n",
+    "                    {\n",
+    "                        \"type\": \"text\",\n",
+    "                        \"text\": \"What is shown in this image? Be extremely detailed and specific.\",\n",
+    "                    },\n",
+    "                ],\n",
+    "            },\n",
+    "        ],\n",
+    "        \"parameters\": {\"temperature\": 0.7, \"max_new_tokens\": 2048},\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "# save the json object to a file\n",
+    "sample_score_file_path = os.path.join(\".\", \"sample_chat_completions_score.json\")\n",
+    "with open(sample_score_file_path, \"w\") as f:\n",
+    "    json.dump(test_json, f, indent=4)\n",
+    "\n",
+    "print(\"Input payload:\\n\")\n",
+    "print(test_json)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# score the sample_chat_completions_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=deployment_name,\n",
+    "    request_file=sample_score_file_path,\n",
+    ")\n",
+    "print(\"Raw JSON Response: \\n\", response, \"\\n\")\n",
+    "\n",
+    "# Parse the JSON string\n",
+    "json_data = json.loads(response)\n",
+    "\n",
+    "# Convert the parsed JSON to a DataFrame\n",
+    "response_df = pd.DataFrame([json_data])\n",
+    "print(\"Generated Text:\\n\", response_df[\"output\"].iloc[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Test the endpoint using Azure OpenAI style payload\n",
+    "\n",
+    "We will send a sample request with Azure OpenAI Style payload to the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aoai_test_json = {\n",
+    "    \"model\": foundation_model.name,\n",
+    "    \"messages\": [\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": [\n",
+    "                {\n",
+    "                    \"type\": \"image_url\",\n",
+    "                    \"image_url\": {\n",
+    "                        \"url\": \"https://www.ilankelman.org/stopsigns/australia.jpg\"\n",
+    "                    },\n",
+    "                },\n",
+    "                {\n",
+    "                    \"type\": \"text\",\n",
+    "                    \"text\": \"What is shown in this image? Be extremely detailed and specific.\",\n",
+    "                },\n",
+    "            ],\n",
+    "        }\n",
+    "    ],\n",
+    "    \"temperature\": 0.7,\n",
+    "    \"max_new_tokens\": 2048,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the scoring uri\n",
+    "scoring_uri = workspace_ml_client.online_endpoints.get(\n",
+    "    name=online_endpoint_name\n",
+    ").scoring_uri\n",
+    "# Update the scoring uri to use for AOAI\n",
+    "aoai_format_scoring_uri = scoring_uri.replace(\"/score\", \"/v1/chat/completions\")\n",
+    "\n",
+    "# Get the key for data plane operation\n",
+    "data_plane_token = workspace_ml_client.online_endpoints.get_keys(\n",
+    "    name=online_endpoint_name\n",
+    ").primary_key"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import urllib.request\n",
+    "import json\n",
+    "\n",
+    "# Prepare request\n",
+    "body = str.encode(json.dumps(aoai_test_json))\n",
+    "url = aoai_format_scoring_uri\n",
+    "api_key = data_plane_token\n",
+    "\n",
+    "headers = {\"Content-Type\": \"application/json\", \"Authorization\": (\"Bearer \" + api_key)}\n",
+    "req = urllib.request.Request(url, body, headers)\n",
+    "\n",
+    "# Send request & get response\n",
+    "try:\n",
+    "    response = urllib.request.urlopen(req)\n",
+    "    result = response.read().decode(\"utf-8\")\n",
+    "    print(result)\n",
+    "except urllib.error.HTTPError as error:\n",
+    "    print(\"The request failed with status code: \" + str(error.code))\n",
+    "    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure\n",
+    "    print(error.info())\n",
+    "    print(error.read().decode(\"utf8\", \"ignore\"))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json b/sdk/python/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json
new file mode 100644
index 0000000000..e16bba5ecf
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/visual-chat-completion/sample_chat_completions_score.json
@@ -0,0 +1,25 @@
+{
+    "input_data": {
+        "input_string": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://www.ilankelman.org/stopsigns/australia.jpg"
+                        }
+                    },
+                    {
+                        "type": "text",
+                        "text": "What is shown in this image? Be extremely detailed and specific."
+                    }
+                ]
+            }
+        ],
+        "parameters": {
+            "temperature": 0.7,
+            "max_new_tokens": 2048
+        }
+    }
+}
\ No newline at end of file

From 815232546d58069a1a9fb43a643ecf340076c6f4 Mon Sep 17 00:00:00 2001
From: Facundo Santiago <fasantia@microsoft.com>
Date: Tue, 21 May 2024 10:43:16 -0400
Subject: [PATCH 5/6] MaaP/MaaS (#3202)

* init

* black

* system
---
 .../foundation-models/phi-3/litellm.ipynb     | 274 +++++++++++++++++
 .../foundation-models/phi-3/openaisdk.ipynb   | 279 ++++++++++++++++++
 .../foundation-models/phi-3/webrequests.ipynb | 275 +++++++++++++++++
 3 files changed, 828 insertions(+)
 create mode 100644 sdk/python/foundation-models/phi-3/litellm.ipynb
 create mode 100644 sdk/python/foundation-models/phi-3/openaisdk.ipynb
 create mode 100644 sdk/python/foundation-models/phi-3/webrequests.ipynb

diff --git a/sdk/python/foundation-models/phi-3/litellm.ipynb b/sdk/python/foundation-models/phi-3/litellm.ipynb
new file mode 100644
index 0000000000..0b53b16c77
--- /dev/null
+++ b/sdk/python/foundation-models/phi-3/litellm.ipynb
@@ -0,0 +1,274 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Use litellm with Phi-3 in Azure AI and Azure ML\n",
+    "\n",
+    "Use `litellm` to consume Phi-3 deployments in Azure AI and Azure ML. Notice that Phi-3 supports only chat completions API.\n",
+    "\n",
+    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi3) for the Phi-3 family of models at for AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "Before we start, there are certain steps we need to take to deploy the models:\n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home) or Azure Machine Learning.\n",
+    "* Create a project or workspace.\n",
+    "* Go to the Model Catalog and search for any of the models of the Phi-3 family. This example shows a `phi-3-mini-128k-instruct` (chat completions) and a `phi-3-vision-128k-instruct` (chat completions with vision).\n",
+    "* Deploy it using either \"Serverless API endpoints\" or \"Self-hosted Online Endpoints\".\n",
+    "\n",
+    "    > Notice that `phi-3-vision-128k-instruct` can only be deployed to Self-hosted Online Endpoints. You need to ensure you have enough quota in yur subscription to use this model. You can always use our temporary quota access to have an endpoint working for 7 days.\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi-3) for model deployment and inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `litellm`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install litellm\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example\n",
+    "\n",
+    "The following is an example about how to use `litellm` with a Phi-3 model deployed in Azure AI and Azure ML:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "imports"
+   },
+   "outputs": [],
+   "source": [
+    "import litellm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You will need to have a Endpoint url and Authentication Key associated with that endpoint. This can be acquired from previous steps. To work with `litellm`, configure the client as follows:\n",
+    "\n",
+    "- `base_url`: Use the endpoint URL from your deployment. Include the `/v1` in the URL.\n",
+    "- `api_key`: Use your API key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_client"
+   },
+   "outputs": [],
+   "source": [
+    "client = litellm.LiteLLM(\n",
+    "    base_url=\"<endpoint-url>/v1\",\n",
+    "    api_key=\"<key>\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the client to create chat completions requests:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_invoke"
+   },
+   "outputs": [],
+   "source": [
+    "response = client.chat.completions.create(\n",
+    "    messages=[{\"content\": \"Who is the most renowned French painter?\", \"role\": \"user\"}],\n",
+    "    model=\"azureai\",\n",
+    "    custom_llm_provider=\"custom_openai\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The generated text can be accessed as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_response"
+   },
+   "outputs": [],
+   "source": [
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Limitations:\n",
+    "- Only `user` and `assistant` roles are supported by Phi-3. Using the role `system` will get the model to ignore those messages.\n",
+    "- Tools is not supported.\n",
+    "- Output type `json_object` is not supported. You can still prompt the model to generate JSON responses, but you have to validate that the response is a valid JSON object."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Phi-3 Vision\n",
+    "\n",
+    "Phi-3 Vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's create a client to connect to the model. Use the endpoint URL and append `/v1` to it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = litellm.LiteLLM(\n",
+    "    base_url=\"<endpoint-url>/v1\",\n",
+    "    api_key=\"<key>\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from urllib.request import urlopen, Request\n",
+    "import os\n",
+    "import base64\n",
+    "\n",
+    "image_url = \"https://news.microsoft.com/source/wp-content/uploads/2024/04/The-Phi-3-small-language-models-with-big-potential-1-1900x1069.jpg\"\n",
+    "image_format = \"jpeg\"\n",
+    "\n",
+    "request = Request(image_url, headers={\"User-Agent\": \"Mozilla/5.0\"})\n",
+    "image_data = base64.b64encode(urlopen(request).read()).decode(\"utf-8\")\n",
+    "data_url = f\"data:image/{image_format};base64,{image_data}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can visualize the image:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import IPython.display as Disp\n",
+    "\n",
+    "Disp.Image(requests.get(image_url).content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's see what Phi-3 can analyze about it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = client.chat.completions.create(\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": [\n",
+    "                {\n",
+    "                    \"type\": \"text\",\n",
+    "                    \"text\": \"Which conclusion can be extracted from the following chart?\",\n",
+    "                },\n",
+    "                {\n",
+    "                    \"type\": \"image_url\",\n",
+    "                    \"image_url\": {\n",
+    "                        \"url\": data_url,\n",
+    "                    },\n",
+    "                },\n",
+    "            ],\n",
+    "        }\n",
+    "    ],\n",
+    "    model=\"azureai\",\n",
+    "    custom_llm_provider=\"custom_openai\",\n",
+    ")\n",
+    "\n",
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aditional resources\n",
+    "\n",
+    "Here are some additional reference:  \n",
+    "\n",
+    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio/how-to/costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)\n",
+    "* [Phi-3 family of models](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi3)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK v2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/phi-3/openaisdk.ipynb b/sdk/python/foundation-models/phi-3/openaisdk.ipynb
new file mode 100644
index 0000000000..5e21947702
--- /dev/null
+++ b/sdk/python/foundation-models/phi-3/openaisdk.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Use OpenAI SDK with Phi-3 in Azure AI and Azure ML\n",
+    "\n",
+    "Use `openai` SDK to consume Phi-3 deployments in Azure AI and Azure ML. The Phi-3 family of models in Azure AI and Azure ML offers an API compatible with the OpenAI Chat Completion API. It allows customers and users to transition seamlessly from OpenAI models to Phi-3 LLMs. \n",
+    "\n",
+    "The API can be directly used with OpenAI's client libraries or third-party tools, like LangChain or LlamaIndex.\n",
+    "\n",
+    "The example below shows how to make this transition using the OpenAI Python Library. Notice that Phi-3 supports only chat completions API.\n",
+    "\n",
+    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi3) for the Phi-3 family of models at for AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "Before we start, there are certain steps we need to take to deploy the models:\n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home) or Azure Machine Learning.\n",
+    "* Create a project or workspace.\n",
+    "* Go to the Model Catalog and search for any of the models of the Phi-3 family. This example shows a `phi-3-mini-128k-instruct` (chat completions) and a `phi-3-vision-128k-instruct` (chat completions with vision).\n",
+    "* Deploy it using either \"Serverless API endpoints\" or \"Self-hosted Online Endpoints\".\n",
+    "\n",
+    "    > Notice that `phi-3-vision-128k-instruct` can only be deployed to Self-hosted Online Endpoints. You need to ensure you have enough quota in yur subscription to use this model. You can always use our temporary quota access to have an endpoint working for 7 days.\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi-3) for model deployment and inference.\n",
+    "\n",
+    "To complete this tutorial, you will need to:\n",
+    "\n",
+    "* Install `openai`:\n",
+    "\n",
+    "    ```bash\n",
+    "    pip install openai\n",
+    "    ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example\n",
+    "\n",
+    "The Phi-3-Mini-128K-Instruct is a 3.8 billion-parameter, lightweight, state-of-the-art open model trained using the Phi-3 datasets. This dataset includes both synthetic data and filtered publicly available website data, with an emphasis on high-quality and reasoning-dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) that it can support.\n",
+    "\n",
+    "The following is an example about how to use `openai` with a `phi-3-mini-128k-instruct` model deployed in Azure AI and Azure ML. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "imports"
+   },
+   "outputs": [],
+   "source": [
+    "from openai import OpenAI"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You will need to have a Endpoint url and Authentication Key associated with that endpoint. This can be acquired from previous steps. \n",
+    "To work with `openai`, configure the client as follows:\n",
+    "\n",
+    "- `base_url`: Use the endpoint URL from your deployment. Include `/v1` as part of the URL.\n",
+    "- `api_key`: Use your API key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_client"
+   },
+   "outputs": [],
+   "source": [
+    "client = OpenAI(base_url=\"<endpoint-url>/v1\", api_key=\"<key>\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the client to create chat completions requests:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_invoke"
+   },
+   "outputs": [],
+   "source": [
+    "response = client.chat.completions.create(\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"Who is the most renowned French painter? Provide a short answer.\",\n",
+    "        }\n",
+    "    ],\n",
+    "    model=\"azureai\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The generated text can be accessed as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "chat_response"
+   },
+   "outputs": [],
+   "source": [
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Limitations:\n",
+    "- Only `user` and `assistant` roles are supported by Phi-3. Using the role `system` will get the model to ignore those messages.\n",
+    "- Tools is not supported.\n",
+    "- Output type `json_object` is not supported. You can still prompt the model to generate JSON responses, but you have to validate that the response is a valid JSON object."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Phi-3 Vision\n",
+    "\n",
+    "Phi-3 Vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's create a client to connect to the model. Use the endpoint URL and append `/v1` to it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = OpenAI(base_url=\"<endpoint-url>/v1\", api_key=\"<key>\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from urllib.request import urlopen, Request\n",
+    "import os\n",
+    "import base64\n",
+    "\n",
+    "image_url = \"https://news.microsoft.com/source/wp-content/uploads/2024/04/The-Phi-3-small-language-models-with-big-potential-1-1900x1069.jpg\"\n",
+    "image_format = \"jpeg\"\n",
+    "\n",
+    "request = Request(image_url, headers={\"User-Agent\": \"Mozilla/5.0\"})\n",
+    "image_data = base64.b64encode(urlopen(request).read()).decode(\"utf-8\")\n",
+    "data_url = f\"data:image/{image_format};base64,{image_data}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can visualize the image:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import IPython.display as Disp\n",
+    "\n",
+    "Disp.Image(requests.get(image_url).content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's see what Phi-3 can analyze about it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = client.chat.completions.create(\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": [\n",
+    "                {\n",
+    "                    \"type\": \"text\",\n",
+    "                    \"text\": \"Which conclusion can be extracted from the following chart?\",\n",
+    "                },\n",
+    "                {\n",
+    "                    \"type\": \"image_url\",\n",
+    "                    \"image_url\": {\n",
+    "                        \"url\": data_url,\n",
+    "                    },\n",
+    "                },\n",
+    "            ],\n",
+    "        }\n",
+    "    ],\n",
+    "    model=\"azureai\",\n",
+    "    max_tokens=2048,\n",
+    ")\n",
+    "\n",
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aditional resources\n",
+    "\n",
+    "Here are some additional reference:  \n",
+    "\n",
+    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio/how-to/costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)\n",
+    "* [Phi-3 family of models](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi3)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "azureml_py38_PT_TF",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/phi-3/webrequests.ipynb b/sdk/python/foundation-models/phi-3/webrequests.ipynb
new file mode 100644
index 0000000000..782dc3fd99
--- /dev/null
+++ b/sdk/python/foundation-models/phi-3/webrequests.ipynb
@@ -0,0 +1,275 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Use Azure API with Phi-3\n",
+    "\n",
+    "This notebook shows examples of how to use Phi-3 APIs offered by Microsoft Azure AI and Azure ML. We will cover:  \n",
+    "* HTTP requests API usage for Phi-3 pretrained and chat models in CLI\n",
+    "* HTTP requests API usage for Phi-3 pretrained and chat models in Python\n",
+    "\n",
+    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-Phi-3) for the Phi-3 family of models at for AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "Before we start, there are certain steps we need to take to deploy the models:\n",
+    "\n",
+    "* Register for a valid Azure account with subscription \n",
+    "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home) or Azure Machine Learning.\n",
+    "* Create a project or workspace.\n",
+    "* Go to the Model Catalog and search for any of the models of the Phi-3 family. This example shows a `phi-3-mini-128k-instruct` (chat completions) and a `phi-3-vision-128k-instruct` (chat completions with vision).\n",
+    "* Deploy it using either \"Serverless API endpoints\" or \"Self-hosted Online Endpoints\".\n",
+    "\n",
+    "    > Notice that `phi-3-vision-128k-instruct` can only be deployed to Self-hosted Online Endpoints. You need to ensure you have enough quota in yur subscription to use this model. You can always use our temporary quota access to have an endpoint working for 7 days.\n",
+    "\n",
+    "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
+    "\n",
+    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi-3) for model deployment and inference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## HTTP Requests API Usage in CLI\n",
+    "\n",
+    "### Basics\n",
+    "\n",
+    "For using the REST API, You will need to have a Endpoint url and Authentication Key associated with that endpoint.  \n",
+    "This can be acquired from previous steps.  \n",
+    "\n",
+    "In this chat completion example, we use a simple curl call for illustration. There are three major components:  \n",
+    "\n",
+    "* The `host-url` is your endpoint url with chat completion schema `/v1/chat/completions`. \n",
+    "* The `headers` defines the content type as well as your api key. \n",
+    "* The `payload` or `data`, which is your prompt detail and model hyper parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!curl -X POST -L https://your-endpoint.inference.ai.azure.com/v1/chat/completions -H 'Content-Type: application/json' -H 'Authorization: your-auth-key' -d '{\"messages\":[{\"content\":\"You are a helpful assistant. What is good about Wuhan?\",\"role\":\"user\"}], \"max_tokens\": 50}'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Limitations:\n",
+    "- Only `user` and `assistant` roles are supported by Phi-3. Using the role `system` will get the model to ignore those messages.\n",
+    "- Tools is not supported.\n",
+    "- Output type `json_object` is not supported. You can still prompt the model to generate JSON responses, but you have to validate that the response is a valid JSON object."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Streaming\n",
+    "\n",
+    "One fantastic feature the API offered is the streaming capability. Streaming allows the generated tokens to be sent as data-only server-sent events whenever they become available. This is extremely important for interactive applications such as chatbots, so the user is always engaged.  \n",
+    "\n",
+    "To use streaming, simply set `\"stream\":\"True\"` as part of the request payload.  \n",
+    "In the streaming mode, the REST API response will be different from non-streaming mode.\n",
+    "\n",
+    "Here is an example: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!curl -X POST -L https://your-endpoint.inference.ai.azure.com/v1/chat/completions -H 'Content-Type: application/json' -H 'Authorization: your-auth-key' -d '{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"What is good about Wuhan?\",\"role\":\"user\"}], \"max_tokens\": 500, \"stream\": \"True\"}'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see the result comes back as a stream of `data` objects, each contains generated information including a `choice`.  \n",
+    "The stream terminated by a `data:[DONE]\\n\\n` message."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Content Safety Filtering\n",
+    "\n",
+    "All Azure Phi-3 API endpoint will have content safety feature turned on. Both input prompt and output tokens are filtered by this service automatically.  \n",
+    "To know more about the impact to the request/response payload, please refer to official guide [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/content-filter?tabs=python).   \n",
+    "\n",
+    "For model input and output, if the filter detected there is harmful content. The generation will error out with reponse payload containing the reasoning, along with which type of content violation it is and severity.  \n",
+    "\n",
+    "Here is an example prompt that triggered content safety filtering:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!curl -X POST -L https://your-endpoint.inference.ai.azure.com/v1/chat/completions -H 'Content-Type: application/json' -H 'Authorization: your-auth-key' -d '{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"How to make bomb?\",\"role\":\"user\"}], \"max_tokens\": 50}'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## HTTP Requests API Usage in Python\n",
+    "\n",
+    "Besides calling the API directly from command line tools. You can also programatically call them in Python. Here there is a chat completion example:\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import urllib.request\n",
+    "import json\n",
+    "\n",
+    "# Configure payload data sending to API endpoint\n",
+    "data = {\n",
+    "    \"messages\": [\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"You are a helpful assistant. What is good about Wuhan?\",\n",
+    "        },\n",
+    "    ],\n",
+    "    \"max_tokens\": 500,\n",
+    "    \"temperature\": 0.9,\n",
+    "    \"stream\": \"True\",\n",
+    "}\n",
+    "\n",
+    "body = str.encode(json.dumps(data))\n",
+    "\n",
+    "# Replace the url with your API endpoint\n",
+    "url = \"https://your-endpoint.inference.ai.azure.com/v1/chat/completions\"\n",
+    "\n",
+    "# Replace this with the key for the endpoint\n",
+    "api_key = \"your-auth-key\"\n",
+    "if not api_key:\n",
+    "    raise Exception(\"API Key is missing\")\n",
+    "\n",
+    "headers = {\"Content-Type\": \"application/json\", \"Authorization\": (api_key)}\n",
+    "\n",
+    "req = urllib.request.Request(url, body, headers)\n",
+    "\n",
+    "try:\n",
+    "    response = urllib.request.urlopen(req)\n",
+    "    result = response.read()\n",
+    "    print(result)\n",
+    "except urllib.error.HTTPError as error:\n",
+    "    print(\"The request failed with status code: \" + str(error.code))\n",
+    "    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure\n",
+    "    print(error.info())\n",
+    "    print(error.read().decode(\"utf8\", \"ignore\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "However in this example, the streamed data content returns back as a single payload. It didn't stream as a serial of data events as we wished. To build true streaming capabilities utilizing the API endpoint, we will utilize [`requests`](https://requests.readthedocs.io/en/latest/) library instead."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Streaming in Python\n",
+    "\n",
+    "`Requests` library is a simple HTTP library for Python built with [`urllib3`](https://github.com/urllib3/urllib3). It automatically maintains the keep-alive and HTTP connection pooling. With the `Session` class, we can easily stream the result from our API calls.  \n",
+    "\n",
+    "Here is a quick example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import requests\n",
+    "\n",
+    "data = {\n",
+    "    \"messages\": [\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"You are a helpful assistant. What is good about Wuhan?\",\n",
+    "        },\n",
+    "    ],\n",
+    "    \"max_tokens\": 500,\n",
+    "    \"temperature\": 0.9,\n",
+    "    \"stream\": \"True\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "def post_stream(url):\n",
+    "    s = requests.Session()\n",
+    "    api_key = \"your-auth-key\"\n",
+    "    headers = {\"Content-Type\": \"application/json\", \"Authorization\": (api_key)}\n",
+    "\n",
+    "    with s.post(url, data=json.dumps(data), headers=headers, stream=True) as resp:\n",
+    "        print(resp.status_code)\n",
+    "        for line in resp.iter_lines():\n",
+    "            if line:\n",
+    "                print(line)\n",
+    "\n",
+    "\n",
+    "url = \"https://your-endpoint.inference.ai.azure.com/v1/chat/completions\"\n",
+    "post_stream(url)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aditional resources\n",
+    "\n",
+    "Here are some additional reference:  \n",
+    "\n",
+    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio/how-to/costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 - SDK v2",
+   "language": "python",
+   "name": "python310-sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 7f824f000a3a96c06ae4dda31d7544507a97c27a Mon Sep 17 00:00:00 2001
From: hazemelh <45972073+hazemelh@users.noreply.github.com>
Date: Tue, 21 May 2024 09:54:06 -0700
Subject: [PATCH 6/6] Update webrequests.ipynb (#3208)

* Update webrequests.ipynb

* Update litellm.ipynb

* Update openaisdk.ipynb
---
 sdk/python/foundation-models/jais/litellm.ipynb     | 8 ++++----
 sdk/python/foundation-models/jais/openaisdk.ipynb   | 4 ++--
 sdk/python/foundation-models/jais/webrequests.ipynb | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sdk/python/foundation-models/jais/litellm.ipynb b/sdk/python/foundation-models/jais/litellm.ipynb
index 59b7463e22..76101db1db 100644
--- a/sdk/python/foundation-models/jais/litellm.ipynb
+++ b/sdk/python/foundation-models/jais/litellm.ipynb
@@ -8,7 +8,7 @@
     "\n",
     "Use `litellm` to consume JAIS deployments in Azure AI and Azure ML. Notice that JAIS in Azure only supports chat completions API.\n",
     "\n",
-    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for the JAIS 30b Chat model in AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+    "> Review the [documentation](https://aka.ms/jais-azure-ai-studio-docs) for the JAIS 30b Chat model in AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
    ]
   },
   {
@@ -24,13 +24,13 @@
     "* Create a project and resource group\n",
     "* Select `Jais-30b-Chat` in the model catalog.\n",
     "\n",
-    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n",
+    "    > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio//deployments-connections)\n",
     "\n",
     "* Deploy with \"Serverless APIs\"\n",
     "\n",
     "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
     "\n",
-    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for model deployment and inference.\n",
+    "For more information, you should consult Azure's official documentation [here](https://aka.ms/jais-azure-ai-studio-docs) for model deployment and inference.\n",
     "\n",
     "To complete this tutorial, you will need to:\n",
     "\n",
@@ -133,7 +133,7 @@
     "\n",
     "Here are some additional reference:  \n",
     "\n",
-    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio/how-to/costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)"
+    "* [Plan and manage costs (marketplace)](https://learn.microsoft.com/azure/ai-studio//costs-plan-manage#monitor-costs-for-models-offered-through-the-azure-marketplace)"
    ]
   }
  ],
diff --git a/sdk/python/foundation-models/jais/openaisdk.ipynb b/sdk/python/foundation-models/jais/openaisdk.ipynb
index 8ccaf3b8fd..4ee0cede92 100644
--- a/sdk/python/foundation-models/jais/openaisdk.ipynb
+++ b/sdk/python/foundation-models/jais/openaisdk.ipynb
@@ -12,7 +12,7 @@
     "\n",
     "The example below shows how to make this transition using the OpenAI Python Library. Notice that the Azure AI chat completions API supports only a portion of the parameters supported by OpenAI API.\n",
     "\n",
-    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for the JAIS models in AI Studio and ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+    "> Review the [documentation](https://aka.ms/jais-azure-ai-studio-docs) for the JAIS models in AI Studio and ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
    ]
   },
   {
@@ -34,7 +34,7 @@
     "\n",
     "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
     "\n",
-    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for model deployment and inference.\n",
+    "For more information, you should consult Azure's official documentation [here](https://aka.ms/jais-azure-ai-studio-docs) for model deployment and inference.\n",
     "\n",
     "To complete this tutorial, you will need to:\n",
     "\n",
diff --git a/sdk/python/foundation-models/jais/webrequests.ipynb b/sdk/python/foundation-models/jais/webrequests.ipynb
index 3c00278528..30d6406283 100644
--- a/sdk/python/foundation-models/jais/webrequests.ipynb
+++ b/sdk/python/foundation-models/jais/webrequests.ipynb
@@ -10,7 +10,7 @@
     "* HTTP requests API usage for Mistral pretrained and chat models in CLI\n",
     "* HTTP requests API usage for Mistral pretrained and chat models in Python\n",
     "\n",
-    "> Review the [documentation](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for the JAIS model for AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
+    "> Review the [documentation](https://aka.ms/jais-azure-ai-studio-docs) for the JAIS model for AI Studio and for ML Studio for details on how to provision inference endpoints, regional availability, pricing and inference schema reference."
    ]
   },
   {
@@ -32,7 +32,7 @@
     "\n",
     "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n",
     "\n",
-    "For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-jais) for model deployment and inference."
+    "For more information, you should consult Azure's official documentation [here](https://aka.ms/jais-azure-ai-studio-docs) for model deployment and inference."
    ]
   },
   {