diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 3019114930..19d0afe5f7 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.11-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.12-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min
index dec972eaf3..9147d70718 100644
--- a/Dockerfile.win10.min
+++ b/Dockerfile.win10.min
@@ -37,9 +37,9 @@ RUN choco install unzip -y
 #
 # Installing TensorRT
 #
-ARG TENSORRT_VERSION=10.4.0.26
+ARG TENSORRT_VERSION=10.7.0.23
 ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows.win10.cuda-12.6.zip"
-ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip
+ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip
 # COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
 ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
 RUN unzip /tmp/%TENSORRT_ZIP%
@@ -51,9 +51,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
 #
 # Installing cuDNN
 #
-ARG CUDNN_VERSION=9.4.0.58
+ARG CUDNN_VERSION=9.6.0.74
 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
-ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.4.0.58_cuda12-archive.zip
+ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.6.0.74_cuda12-archive.zip
 ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
 RUN unzip /tmp/%CUDNN_ZIP%
 RUN move cudnn-* cudnn
@@ -75,20 +75,19 @@ RUN choco install git docker unzip -y
 #
 # Installing python
 #
-ARG PYTHON_VERSION=3.10.11
+ARG PYTHON_VERSION=3.12.3
 ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
 ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
 RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
 RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
 RUN pip install --upgrade wheel setuptools docker
-RUN pip install grpcio-tools psutil
 
 LABEL PYTHON_VERSION=${PYTHON_VERSION}
 
 #
 # Installing CMake
 #
-ARG CMAKE_VERSION=3.30.0
+ARG CMAKE_VERSION=3.30.5
 RUN pip install cmake==%CMAKE_VERSION%
 
 ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
@@ -101,14 +100,16 @@ LABEL CMAKE_VERSION=${CMAKE_VERSION}
 #
 # Installing Visual Studio BuildTools: VS17 2022
 #
-ARG BUILDTOOLS_VERSION=17.10.35201.131
 # Download collect.exe in case of an install failure.
 ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"
 
 # Use the latest release channel. For more control, specify the location of an internal layout.
 # Download the Build Tools bootstrapper.
 # ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
-ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/28626b4b-f88f-4b55-a0cf-f3eaa2c643fb/e6c43d4dfb36338d954cdb3ad9010ab2a479e712088f4f6b016eadcc721bab28/vs_BuildTools.exe
+
+ARG BUILDTOOLS_VERSION=17.12.35506.116
+ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/5536698c-711c-4834-876f-2817d31a2ef2/58894fc272e86d3c3a6d85bf3a1df1e5a0685be8b9ab65d9f3cc5c2a8c6921cc/vs_BuildTools.exe
+
 ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
 # Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
 ARG VS_INSTALL_PATH_WP="C:\BuildTools"
@@ -149,12 +150,13 @@ WORKDIR /
 # Installing CUDA
 #
 ARG CUDA_MAJOR=12
-ARG CUDA_MINOR=5
-ARG CUDA_PATCH=1
+ARG CUDA_MINOR=6
+ARG CUDA_PATCH=3
 ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
 ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
                    cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
                    nvml_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   nvrtc_${CUDA_MAJOR}.${CUDA_MINOR} nvrtc_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                    cublas_${CUDA_MAJOR}.${CUDA_MINOR} cublas_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                    cufft_${CUDA_MAJOR}.${CUDA_MINOR} cufft_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
                    curand_${CUDA_MAJOR}.${CUDA_MINOR} curand_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
@@ -175,7 +177,10 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi
 
 RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
 
-ARG CUDNN_VERSION=9.4.0.58
+ENV CUDA_VERSION=${CUDA_VERSION}
+LABEL CUDA_VERSION="${CUDA_VERSION}"
+
+ARG CUDNN_VERSION=9.6.0.74
 ENV CUDNN_VERSION ${CUDNN_VERSION}
 COPY --from=dependency_base /cudnn /cudnn
 RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
@@ -183,13 +188,12 @@ RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
 RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
 LABEL CUDNN_VERSION="${CUDNN_VERSION}"
 
-ARG TENSORRT_VERSION=10.4.0.26
+ARG TENSORRT_VERSION=10.7.0.23
 ENV TRT_VERSION ${TENSORRT_VERSION}
 COPY --from=dependency_base /TensorRT /TensorRT
 RUN setx PATH "c:\TensorRT\lib;%PATH%"
 LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
 
-LABEL CUDA_VERSION="${CUDA_VERSION}"
 # It is important that the entrypoint initialize VisualStudio
 # environment otherwise the build will fail. Also set
 # CMAKE_TOOLCHAIN_FILE and VCPKG_TARGET_TRIPLET so
diff --git a/README.md b/README.md
index 79e572b97d..b9c076cc60 100644
--- a/README.md
+++ b/README.md
@@ -32,8 +32,8 @@
 
 >[!WARNING]
 >You are currently on the `main` branch which tracks under-development progress
->towards the next release. The current release is version [2.52.0](https://github.com/triton-inference-server/server/releases/latest)
->and corresponds to the 24.11 container release on NVIDIA GPU Cloud (NGC).
+>towards the next release. The current release is version [2.53.0](https://github.com/triton-inference-server/server/releases/latest)
+>and corresponds to the 24.12 container release on NVIDIA GPU Cloud (NGC).
 
 Triton Inference Server is an open source inference serving software that
 streamlines AI inferencing. Triton enables teams to deploy any AI model from
@@ -91,16 +91,16 @@ Inference Server with the
 
 ```bash
 # Step 1: Create the example model repository
-git clone -b r24.11 https://github.com/triton-inference-server/server.git
+git clone -b r24.12 https://github.com/triton-inference-server/server.git
 cd server/docs/examples
 ./fetch_models.sh
 
 # Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.11-py3 tritonserver --model-repository=/models
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.12-py3 tritonserver --model-repository=/models
 
 # Step 3: Sending an Inference Request
 # In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.11-py3-sdk
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.12-py3-sdk
 /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
 
 # Inference should return the following
diff --git a/TRITON_VERSION b/TRITON_VERSION
index 7eb4ffb28a..fd02e9348b 100644
--- a/TRITON_VERSION
+++ b/TRITON_VERSION
@@ -1 +1 @@
-2.53.0dev
+2.54.0dev
diff --git a/build.py b/build.py
index 8b5cb32f98..1cd03f5e99 100755
--- a/build.py
+++ b/build.py
@@ -71,10 +71,10 @@
 #
 
 DEFAULT_TRITON_VERSION_MAP = {
-    "release_version": "2.53.0dev",
-    "triton_container_version": "24.12dev",
-    "upstream_container_version": "24.11",
-    "ort_version": "1.19.2",
+    "release_version": "2.54.0dev",
+    "triton_container_version": "24.01dev",
+    "upstream_container_version": "24.12",
+    "ort_version": "1.20.1",
     "ort_openvino_version": "2024.4.0",
     "standalone_openvino_version": "2024.4.0",
     "dcgm_version": "3.3.6",
@@ -1238,6 +1238,8 @@ def create_dockerfile_linux(
     find /opt/tritonserver/python -maxdepth 1 -type f -name \\
     "tritonfrontend-*.whl" | xargs -I {} pip install --upgrade {}[all]
 
+RUN pip3 install -r python/openai/requirements.txt
+
 """
     if not FLAGS.no_core_build:
         # Add feature labels for SageMaker endpoint
@@ -1934,6 +1936,10 @@ def core_build(
         os.path.join(install_dir, "include", "triton", "core"),
     )
 
+    cmake_script.cpdir(
+        os.path.join(repo_dir, "python", "openai"), os.path.join(install_dir, "python")
+    )
+
     cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
     cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)
 
diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
index 4fcdd14bdb..be118becce 100644
--- a/deploy/aws/values.yaml
+++ b/deploy/aws/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.11-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.12-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
index 4e3c87c387..aac221acd0 100644
--- a/deploy/fleetcommand/Chart.yaml
+++ b/deploy/fleetcommand/Chart.yaml
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.51.0"
+appVersion: "2.53.0"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart
diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
index ff5513c7d7..30b1c331d1 100644
--- a/deploy/fleetcommand/values.yaml
+++ b/deploy/fleetcommand/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.11-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.12-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
     #
     # To set model control mode, uncomment and configure below
     # TODO: Fix the following url, it is invalid
-    # See https://github.com/triton-inference-server/server/blob/r24.11/docs/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r24.12/docs/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r24.11/README.md
+    # see https://github.com/triton-inference-server/server/blob/r24.12/README.md
     #  for more details
 
 service:
diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
index f79cb75134..7a27c61efa 100644
--- a/deploy/gcp/values.yaml
+++ b/deploy/gcp/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.11-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.12-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
index c27a327e2f..6712d7d381 100644
--- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
+++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:24.11-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:24.12-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:
diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
index a0c9762865..8c26ee5ed0 100755
--- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
+++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -27,9 +27,9 @@
 
 export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
 export APP_NAME=tritonserver
-export MAJOR_VERSION=2.51
-export MINOR_VERSION=2.51.0
-export NGC_VERSION=24.11-py3
+export MAJOR_VERSION=2.53
+export MINOR_VERSION=2.53.0
+export NGC_VERSION=24.12-py3
 
 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
 
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
index 027deb1d2f..7ad8ba851b 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 apiVersion: v1
-appVersion: "2.51"
+appVersion: "2.53"
 description: Triton Inference Server
 name: triton-inference-server
-version: 2.51.0
+version: 2.53.0
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
index dfb992a543..673ec6acb3 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -31,14 +31,14 @@ maxReplicaCount: 3
 tritonProtocol: HTTP
 # HPA GPU utilization autoscaling target
 HPATargetAverageValue: 85
-modelRepositoryPath: gs://triton_sample_models/24.11
-publishedVersion: '2.51.0'
+modelRepositoryPath: gs://triton_sample_models/24.12
+publishedVersion: '2.53.0'
 gcpMarketplace: true
 
 image:
   registry: gcr.io
   repository: nvidia-ngc-public/tritonserver
-  tag: 24.11-py3
+  tag: 24.12-py3
   pullPolicy: IfNotPresent
   # modify the model repository here to match your GCP storage bucket
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
index be46874dba..eefb209efb 100644
--- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.51.0'
+  publishedVersion: '2.53.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
index 699fa04a68..1defe7ca42 100644
--- a/deploy/gke-marketplace-app/server-deployer/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.51.0'
+  publishedVersion: '2.53.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
@@ -89,7 +89,7 @@ properties:
   modelRepositoryPath:
     type: string
     title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
-    default: gs://triton_sample_models/24.11
+    default: gs://triton_sample_models/24.12
   image.ldPreloadPath:
     type: string
     title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md
index 6a16fc9523..bdf655b2b0 100644
--- a/deploy/gke-marketplace-app/trt-engine/README.md
+++ b/deploy/gke-marketplace-app/trt-engine/README.md
@@ -33,7 +33,7 @@
 ```
 docker run --gpus all -it --network host \
     --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-    -v ~:/scripts nvcr.io/nvidia/tensorrt:24.11-py3
+    -v ~:/scripts nvcr.io/nvidia/tensorrt:24.12-py3
 
 pip install onnx six torch tf2onnx tensorflow
 
@@ -57,7 +57,7 @@ mkdir -p engines
 
 python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh
 
-gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/24.11/bert/1/model.plan
+gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/24.12/bert/1/model.plan
 ```
 
-For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/24.11/` should be updated accordingly with the correct version.
+For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/24.12/` should be updated accordingly with the correct version.
diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml
index 77f1b47c5b..8e2fdcda6d 100644
--- a/deploy/k8s-onprem/values.yaml
+++ b/deploy/k8s-onprem/values.yaml
@@ -29,7 +29,7 @@ tags:
   loadBalancing: true
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.11-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.12-py3
   pullPolicy: IfNotPresent
   modelRepositoryServer: < Replace with the IP Address of your file server >
   modelRepositoryPath: /srv/models
diff --git a/deploy/oci/values.yaml b/deploy/oci/values.yaml
index 1a62e52e7a..716ac24400 100644
--- a/deploy/oci/values.yaml
+++ b/deploy/oci/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.11-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.12-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
   numGpus: 1
diff --git a/docs/backend_guide/vllm.rst b/docs/backend_guide/vllm.rst
index 06be17128f..d28f2af5ab 100644
--- a/docs/backend_guide/vllm.rst
+++ b/docs/backend_guide/vllm.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ########
 vLLM
 ########
diff --git a/docs/client_guide/api_reference.rst b/docs/client_guide/api_reference.rst
index 0493510e71..f626c1ac9b 100644
--- a/docs/client_guide/api_reference.rst
+++ b/docs/client_guide/api_reference.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 API Reference
 ####
diff --git a/docs/client_guide/in_process.rst b/docs/client_guide/in_process.rst
index b1ee46a925..56ab778440 100644
--- a/docs/client_guide/in_process.rst
+++ b/docs/client_guide/in_process.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 In-Process Triton Server API
 ####
diff --git a/docs/client_guide/kserve.rst b/docs/client_guide/kserve.rst
index e2ac33c45f..310435fc4e 100644
--- a/docs/client_guide/kserve.rst
+++ b/docs/client_guide/kserve.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 KServe API
 ####
diff --git a/docs/client_guide/kserve_extension.rst b/docs/client_guide/kserve_extension.rst
index 7a78484499..dde6c4062b 100644
--- a/docs/client_guide/kserve_extension.rst
+++ b/docs/client_guide/kserve_extension.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 Extensions
 ####
diff --git a/docs/client_guide/python.rst b/docs/client_guide/python.rst
index 2610ce2d87..545f4f6042 100644
--- a/docs/client_guide/python.rst
+++ b/docs/client_guide/python.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 Python
 ####
diff --git a/docs/client_guide/python_readme.rst b/docs/client_guide/python_readme.rst
index 91e3f1b26d..e7a79abe60 100644
--- a/docs/client_guide/python_readme.rst
+++ b/docs/client_guide/python_readme.rst
@@ -1,32 +1,32 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 .. raw:: html
 
-   <!--
-   # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-   #
-   # Redistribution and use in source and binary forms, with or without
-   # modification, are permitted provided that the following conditions
-   # are met:
-   #  * Redistributions of source code must retain the above copyright
-   #    notice, this list of conditions and the following disclaimer.
-   #  * Redistributions in binary form must reproduce the above copyright
-   #    notice, this list of conditions and the following disclaimer in the
-   #    documentation and/or other materials provided with the distribution.
-   #  * Neither the name of NVIDIA CORPORATION nor the names of its
-   #    contributors may be used to endorse or promote products derived
-   #    from this software without specific prior written permission.
-   #
-   # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-   # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-   # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-   # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-   -->
 
 Triton Inference Server In-Process Python API [BETA]
 ====================================================
diff --git a/docs/conf.py b/docs/conf.py
index 6c59e45c72..0b44f7c8b2 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -177,7 +177,7 @@
     "switcher": {
         # use for local testing
         # "json_url": "http://localhost:8000/_static/switcher.json",
-        "json_url": "https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/_static/switcher.json",
+        "json_url": "https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/_static/switcher.json",
         "version_match": one_before if "dev" in version_long else version_short,
     },
     "navbar_start": ["navbar-logo", "version-switcher"],
diff --git a/docs/contents.rst b/docs/contents.rst
index ff132c729d..555c433d85 100644
--- a/docs/contents.rst
+++ b/docs/contents.rst
@@ -1,27 +1,29 @@
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 .. toctree::
    :hidden:
diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md
index 0622414609..fcb4ce14e9 100644
--- a/docs/customization_guide/build.md
+++ b/docs/customization_guide/build.md
@@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container ta
 
 If you are building on a release branch then `<container tag>` will
 default to the branch name. For example, if you are building on the
-r24.11 branch, `<container tag>` will default to r24.11. If you are
+r24.12 branch, `<container tag>` will default to r24.12. If you are
 building on any other branch (including the *main* branch) then
 `<container tag>` will default to "main". Therefore, you typically do
 not need to provide `<container tag>` at all (nor the preceding
@@ -334,8 +334,8 @@ python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild
 If you are building on *main* branch then `<container tag>` will
 default to "main". If you are building on a release branch then
 `<container tag>` will default to the branch name. For example, if you
-are building on the r24.11 branch, `<container tag>` will default to
-r24.11. Therefore, you typically do not need to provide `<container
+are building on the r24.12 branch, `<container tag>` will default to
+r24.12. Therefore, you typically do not need to provide `<container
 tag>` at all (nor the preceding colon). You can use a different
 `<container tag>` for a component to instead use the corresponding
 branch/tag in the build. For example, if you have a branch called
diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md
index 8bddd46aeb..9f20a05347 100644
--- a/docs/customization_guide/compose.md
+++ b/docs/customization_guide/compose.md
@@ -46,8 +46,8 @@ The `compose.py` script can be found in the
 Simply clone the repository and run `compose.py` to create a custom container.
 Note: Created container version will depend on the branch that was cloned.
 For example branch
- [r24.11](https://github.com/triton-inference-server/server/tree/r24.11)
-should be used to create a image based on the NGC 24.11 Triton release.
+ [r24.12](https://github.com/triton-inference-server/server/tree/r24.12)
+should be used to create a image based on the NGC 24.12 Triton release.
 
 `compose.py` provides `--backend`, `--repoagent` options that allow you to
 specify which backends and repository agents to include in the custom image.
@@ -79,20 +79,20 @@ For example, running
 ```
 python3 compose.py --backend pytorch --repoagent checksum
 ```
-on branch [r24.11](https://github.com/triton-inference-server/server/tree/r24.11) pulls:
-- `min` container `nvcr.io/nvidia/tritonserver:24.11-py3-min`
-- `full` container `nvcr.io/nvidia/tritonserver:24.11-py3`
+on branch [r24.12](https://github.com/triton-inference-server/server/tree/r24.12) pulls:
+- `min` container `nvcr.io/nvidia/tritonserver:24.12-py3-min`
+- `full` container `nvcr.io/nvidia/tritonserver:24.12-py3`
 
 Alternatively, users can specify the version of Triton container to pull from
 any branch by either:
 1. Adding flag `--container-version <container version>` to branch
 ```
-python3 compose.py --backend pytorch --repoagent checksum --container-version 24.11
+python3 compose.py --backend pytorch --repoagent checksum --container-version 24.12
 ```
 2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
    The user is responsible for specifying compatible `min` and `full` containers.
 ```
-python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.11-py3-min --image full,nvcr.io/nvidia/tritonserver:24.11-py3
+python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.12-py3-min --image full,nvcr.io/nvidia/tritonserver:24.12-py3
 ```
 Method 1 and 2 will result in the same composed container. Furthermore,
 `--image` flag overrides the `--container-version` flag when both are specified.
@@ -103,8 +103,8 @@ Note:
 2. vLLM and TensorRT-LLM backends are currently not supported backends for
 `compose.py`. If you want to build additional backends on top of these backends,
 it would be better to [build it yourself](#build-it-yourself) by using
-`nvcr.io/nvidia/tritonserver:24.11-vllm-python-py3` or
-`nvcr.io/nvidia/tritonserver:24.11-trtllm-python-py3` as a `min` container.
+`nvcr.io/nvidia/tritonserver:24.12-vllm-python-py3` or
+`nvcr.io/nvidia/tritonserver:24.12-trtllm-python-py3` as a `min` container.
 
 
 ### CPU-only container composition
diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md
index 39891b3177..a85a10f48b 100644
--- a/docs/customization_guide/test.md
+++ b/docs/customization_guide/test.md
@@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops
 ```
 
 This will create multiple model repositories in /tmp/\<version\>/qa_*
-(for example /tmp/24.11/qa_model_repository).  The TensorRT models
+(for example /tmp/24.12/qa_model_repository).  The TensorRT models
 will be created for the GPU on the system that CUDA considers device 0
 (zero). If you have multiple GPUs on your system see the documentation
 in the scripts for how to target a specific GPU.
diff --git a/docs/generate_docs.py b/docs/generate_docs.py
index 065c14de1e..048bd77035 100755
--- a/docs/generate_docs.py
+++ b/docs/generate_docs.py
@@ -41,11 +41,11 @@
 """
 TODO: Needs to handle cross-branch linkage.
 
-For example, server/docs/user_guide/architecture.md on branch 24.11 links to
+For example, server/docs/user_guide/architecture.md on branch 24.12 links to
 server/docs/user_guide/model_analyzer.md on main branch. In this case, the
 hyperlink of model_analyzer.md should be a URL instead of relative path.
 
-Another example can be server/docs/user_guide/model_analyzer.md on branch 24.11
+Another example can be server/docs/user_guide/model_analyzer.md on branch 24.12
 links to a file in server repo with relative path. Currently all URLs are
 hardcoded to main branch. We need to make sure that the URL actually points to the
 correct branch. We also need to handle cases like deprecated or removed files from
diff --git a/docs/getting_started/llm.md b/docs/getting_started/llm.md
index b5f738c3d5..cecf565f51 100644
--- a/docs/getting_started/llm.md
+++ b/docs/getting_started/llm.md
@@ -1,3 +1,31 @@
+<!--
+# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
 # Deploying Phi-3 Model with Triton and TRT-LLM
 
 This guide captures the steps to build Phi-3 with TRT-LLM and deploy with Triton Inference Server. It also shows a shows how to use GenAI-Perf to run benchmarks to measure model performance in terms of throughput and latency.
@@ -326,7 +354,7 @@ All config files inside /tensorrtllm\_backend/all\_models/inflight\_batcher\_llm
 <details>
 <summary><b> ensemble/config.pbtxt</b></summary>
 
-    # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+    # Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
     #
     # Redistribution and use in source and binary forms, with or without
     # modification, are permitted provided that the following conditions
@@ -836,7 +864,7 @@ All config files inside /tensorrtllm\_backend/all\_models/inflight\_batcher\_llm
 <details>
 <summary><b>postprocessing/config.pbtxt</b></summary>
 
-    # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+    # Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
     #
     # Redistribution and use in source and binary forms, with or without
     # modification, are permitted provided that the following conditions
@@ -965,7 +993,7 @@ All config files inside /tensorrtllm\_backend/all\_models/inflight\_batcher\_llm
 <details>
 <summary><b> preprocessing/config.pbtxt</b> </summary>
 
-    # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+    # Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
     #
     # Redistribution and use in source and binary forms, with or without
     # modification, are permitted provided that the following conditions
@@ -1160,7 +1188,7 @@ All config files inside /tensorrtllm\_backend/all\_models/inflight\_batcher\_llm
 <summary> <b> tensorrt_llm/config.pbtxt </b></summary>
 
 
-    # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+    # Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
     #
     # Redistribution and use in source and binary forms, with or without
     # modification, are permitted provided that the following conditions
diff --git a/docs/getting_started/quick_deployment_by_backend.rst b/docs/getting_started/quick_deployment_by_backend.rst
index c8e461c00c..aefa56787b 100644
--- a/docs/getting_started/quick_deployment_by_backend.rst
+++ b/docs/getting_started/quick_deployment_by_backend.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 Quick Deployment Guide by backend
 ####
diff --git a/docs/getting_started/quick_start.rst b/docs/getting_started/quick_start.rst
index 8af21534a3..27f100e3cd 100644
--- a/docs/getting_started/quick_start.rst
+++ b/docs/getting_started/quick_start.rst
@@ -1,32 +1,32 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 .. raw:: html
 
-   <!--
-   # Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
-   #
-   # Redistribution and use in source and binary forms, with or without
-   # modification, are permitted provided that the following conditions
-   # are met:
-   #  * Redistributions of source code must retain the above copyright
-   #    notice, this list of conditions and the following disclaimer.
-   #  * Redistributions in binary form must reproduce the above copyright
-   #    notice, this list of conditions and the following disclaimer in the
-   #    documentation and/or other materials provided with the distribution.
-   #  * Neither the name of NVIDIA CORPORATION nor the names of its
-   #    contributors may be used to endorse or promote products derived
-   #    from this software without specific prior written permission.
-   #
-   # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-   # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-   # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-   # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-   -->
 
 Quickstart
 ==========
diff --git a/docs/introduction/compatibility.md b/docs/introduction/compatibility.md
index da72b5c2e8..d15a866a86 100644
--- a/docs/introduction/compatibility.md
+++ b/docs/introduction/compatibility.md
@@ -37,7 +37,9 @@
 
 | Triton release version	 | NGC Tag	 | Python version	 | Torch version | TensorRT version | TensorRT-LLM version | CUDA version | CUDA Driver version | Size |
 | --- | ---  | --- | --- | --- | --- | --- | --- | --- |
-| 24.10 | nvcr.io/nvidia/tritonserver:24.10-trtllm-python-py3 | Python 3.10.12  | 2.4.0a0%2B3bcc3cddb5.nv24.7 | 10.4.0 | 0.14.0 | 12.5.1.007 | 555.42.06 | 21G |
+| 24.12 | nvcr.io/nvidia/tritonserver:24.12-trtllm-python-py3 | Python 3.12.3  | 2.6.0a0%2Bdf5bbc09d1.nv24.11 | 10.7.0 | 0.16.0 | 12.6.3 | 560.35.05 | 22G |
+| 24.11 | nvcr.io/nvidia/tritonserver:24.11-trtllm-python-py3 | Python 3.10.12  | 2.5.0a0%2Be000cf0ad9.nv24.10 | 10.6.0 | 0.15.0 | 12.6.3 | 555.42.06 | 24.8G |
+| 24.10 | nvcr.io/nvidia/tritonserver:24.10-trtllm-python-py3 | Python 3.10.12  | 2.4.0a0%2B3bcc3cddb5.nv24.7 | 10.4.0 | 0.14.0 | 12.5.1.007 | 555.42.06 | 23.3G |
 | 24.09 | nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3 | Python 3.10.12  | 2.4.0a0%2B3bcc3cddb5.nv24.7 | 10.4.0 | 0.13.0 | 12.5.1.007 | 555.42.06 | 21G |
 | 24.08 | nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3 | Python 3.10.12 | 2.4.0a0%2B3bcc3cddb5.nv24.7 | 10.3.0 | 0.12.0 | 12.5.1.007 | 555.42.06 | 21G |
 | 24.07 | nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3 | Python 3.10.12 | 2.4.0a0%2B07cecf4168.nv24.5 | 10.1.0 | 0.11.0 | 12.4.1.003 | 550.54.15 | 23G |
@@ -49,7 +51,9 @@
 
 | Triton release version	 | NGC Tag	 | Python version	 | vLLM version | CUDA version | CUDA Driver version | Size |
 | --- | --- | --- | --- | --- | --- | --- |
-| 24.10 | nvcr.io/nvidia/tritonserver:24.10-vllm-python-py3 | Python 3.10.12 | 0.5.5 | 12.6.2.004 | 560.35.03 | 19G |
+| 24.12 | nvcr.io/nvidia/tritonserver:24.12-vllm-python-py3 | Python 3.12.3 |  0.5.5 | 12.6.3.004 | 560.35.05 | 20G |
+| 24.11 | nvcr.io/nvidia/tritonserver:24.11-vllm-python-py3 | Python 3.12.3 |  0.5.5 | 12.6.3.001 | 560.35.05 | 22.1G |
+| 24.10 | nvcr.io/nvidia/tritonserver:24.10-vllm-python-py3 | Python 3.10.12 | 0.5.5 | 12.6.2.004 | 560.35.03 | 21G |
 | 24.09 | nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3 | Python 3.10.12 | 0.5.3.post1 | 12.6.1.006 | 560.35.03 | 19G |
 | 24.08 | nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3 | Python 3.10.12  | 0.5.0 post1 | 12.6.0.022 | 560.35.03 | 19G |
 | 24.07 | nvcr.io/nvidia/tritonserver:24.07-vllm-python-py3 | Python 3.10.12  | 0.5.0 post1 | 12.5.1 | 555.42.06 | 19G |
@@ -61,6 +65,8 @@
 
 | Triton release version	 | ONNX Runtime	 |
 | --- | --- |
+| 24.12 | 1.20.1 |
+| 24.11 | 1.19.2 |
 | 24.10 | 1.19.2 |
 | 24.09 | 1.19.2 |
 | 24.08 | 1.18.1 |
diff --git a/docs/introduction/index.md b/docs/introduction/index.md
index 306c2082e7..4ac740f36b 100644
--- a/docs/introduction/index.md
+++ b/docs/introduction/index.md
@@ -54,11 +54,11 @@ the development and deployment of production AI.
 ## Triton Architecture
 
 The following figure shows the Triton Inference Server high-level
-architecture. The [model repository](user_guide/model_repository.md) is a
+architecture. The [model repository](../user_guide/model_repository.md) is a
 file-system based repository of the models that Triton will make
 available for inferencing. Inference requests arrive at the server via
-either [HTTP/REST or GRPC](customization_guide/inference_protocols.md) or by the [C
-API](customization_guide/inference_protocols.md) and are then routed to the appropriate per-model
+either [HTTP/REST or GRPC](../customization_guide/inference_protocols.md) or by the [C
+API](../customization_guide/inprocess_c_api.md) and are then routed to the appropriate per-model
 scheduler. Triton implements [multiple scheduling and batching
 algorithms](#models-and-schedulers) that can be configured on a
 model-by-model basis. Each model's scheduler optionally performs
@@ -75,7 +75,7 @@ custom pre- and post-processing operations or even a new deep-learning
 framework.
 
 The models being served by Triton can be queried and controlled by a
-dedicated [model management API](user_guide/model_management.md) that is
+dedicated [model management API](../user_guide/model_management.md) that is
 available by HTTP/REST or GRPC protocol, or by the C API.
 
 Readiness and liveness health endpoints and utilization, throughput
@@ -93,25 +93,25 @@ Major features include:
 - [Supports multiple machine learning
   frameworks](https://github.com/triton-inference-server/fil_backend)
 - [Concurrent model
-  execution](user_guide/model_execution.md#concurrent-model-execution)
-- [Dynamic batching](user_guide/batcher.md#dynamic-batcher)
-- [Sequence batching](user_guide/batcher.md#sequence-batcher) and
-  [implicit state management](user_guide/implicit_state_management.md#implicit-state-management)
+  execution](../user_guide/model_execution.md#concurrent-model-execution)
+- [Dynamic batching](../user_guide/batcher.md#dynamic-batcher)
+- [Sequence batching](../user_guide/batcher.md#sequence-batcher) and
+  [implicit state management](../user_guide/implicit_state_management.md#implicit-state-management)
   for stateful models
 - Provides [Backend API](https://github.com/triton-inference-server/backend) that
   allows adding custom backends and pre/post processing operations
 - Model pipelines using
-  [Ensembling](user_guide/ensemble_models.md#ensemble-models) or [Business
+  [Ensembling](../user_guide/ensemble_models.md#ensemble-models) or [Business
   Logic Scripting
-  (BLS)](user_guide/bls.md#business-logic-scripting)
+  (BLS)](../user_guide/bls.md#business-logic-scripting)
 - [HTTP/REST and GRPC inference
-  protocols](customization_guide/inference_protocols.md) based on the community
+  protocols](../customization_guide/inference_protocols.md) based on the community
   developed [KServe
   protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
-- A [C API](customization_guide/inprocess_c_api.md) and
-  [Java API](customization_guide/inprocess_java_api.md)
+- A [C API](../customization_guide/inprocess_c_api.md) and
+  [Java API](../customization_guide/inprocess_java_api.md)
   allow Triton to link directly into your application for edge and other in-process use cases
-- [Metrics](user_guide/metrics.md) indicating GPU utilization, server
+- [Metrics](../user_guide/metrics.md) indicating GPU utilization, server
   throughput, server latency, and more
 
 Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and stay current on the latest product updates, bug fixes, content, best
diff --git a/docs/introduction/release_notes.md b/docs/introduction/release_notes.md
index 63f72e0c15..1901985a7e 100644
--- a/docs/introduction/release_notes.md
+++ b/docs/introduction/release_notes.md
@@ -25,9 +25,9 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 -->
-# [Triton Inference Server Release 24.10](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-24-10.html#rel-24-10)
+# [Triton Inference Server Release 24.12](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-24-12.html#rel-24-12)
 
-The Triton Inference Server container image, release 24.10, is available on [NGC](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver) and is open source on [GitHub](https://github.com/triton-inference-server/server).
+The Triton Inference Server container image, release 24.12, is available on [NGC](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver) and is open source on [GitHub](https://github.com/triton-inference-server/server).
 
 
 ## **Contents of the Triton Inference Server container**
@@ -38,60 +38,63 @@ For a complete list of what the container includes, refer to [Deep Learning Fram
 
 The container also includes the following:
 
-- [Ubuntu 22.04](http://releases.ubuntu.com/22.04/) including [Python 3.10](https://www.python.org/downloads/release/python-3100/)
+- [Ubuntu 24.04](http://releases.ubuntu.com/24.04/) including [Python 3.12](https://www.python.org/downloads/release/python-3120/)
 
-- [NVIDIA CUDA 12.6.2](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)
+- [NVIDIA CUDA 12.6.3](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)
 
-- [NVIDIA cuBLAS 12.6.3.3](https://docs.nvidia.com/cuda/cublas/index.html)
+- [NVIDIA cuBLAS 12.6.4.1](https://docs.nvidia.com/cuda/cublas/index.html)
 
-- [cuDNN 9.5.0.50](https://docs.nvidia.com/deeplearning/cudnn/release-notes/)
+- [cuDNN 9.6.0.74](https://docs.nvidia.com/deeplearning/cudnn/release-notes/)
 
-- [NVIDIA NCCL 2.22.3](https://docs.nvidia.com/deeplearning/nccl/release-notes/) (optimized for [NVIDIA NVLink](http://www.nvidia.com/object/nvlink.html)®)
+- [NVIDIA NCCL 2.23.4](https://docs.nvidia.com/deeplearning/nccl/release-notes/) (optimized for [NVIDIA NVLink](http://www.nvidia.com/object/nvlink.html)®)
 
-- [NVIDIA TensorRT™ 10.5.0.18](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html)
+- [NVIDIA TensorRT™ 10.7.0.23](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html)
 
 - OpenUCX 1.15.0
 
 - GDRCopy 2.3
 
-- NVIDIA HPC-X 2.20
+- NVIDIA HPC-X 2.21
 
 - OpenMPI 4.1.7
 
 - [FIL](https://github.com/triton-inference-server/fil_backend)
 
-- [NVIDIA DALI® 1.42](https://docs.nvidia.com/deeplearning/dali/release-notes/index.html)
+- [NVIDIA DALI® 1.44](https://docs.nvidia.com/deeplearning/dali/release-notes/index.html)
 
 - [nvImageCodec 0.2.0.7](https://docs.nvidia.com/cuda/nvimagecodec/release_notes_v0.2.0.html)
 
 - ONNX Runtime 1.19.2
 
-- Intel[ OpenVINO ](https://github.com/openvinotoolkit/openvino/tree/2022.1.0)2024.0.0
+- Intel[ OpenVINO ](https://github.com/openvinotoolkit/openvino/tree/2022.1.0)2024.40.0
 
 - DCGM 3.2.6
 
-- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/) version [release/0.13.0](https://github.com/NVIDIA/TensorRT-LLM/tree/v0.13.0)
+- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/) version [release/0.15.0](https://github.com/NVIDIA/TensorRT-LLM/tree/v0.15.0)
 
-- [vLLM](https://github.com/vllm-project/vllm) version 0.5.3 post 1
+- [vLLM](https://github.com/vllm-project/vllm) version 0.5.5
 
 
 ## **Driver Requirements**
 
-Release 24.10 is based on [CUDA 12.6.2](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) which requires [NVIDIA Driver](http://www.nvidia.com/Download/index.aspx?lang=en-us) release 560 or later. However, if you are running on a data center GPU (for example, T4 or any other data center GPU), you can use NVIDIA driver release 470.57 (or later R470), 525.85 (or later R525), 535.86 (or later R535), or 545.23 (or later R545).
+Release 24.12 is based on [CUDA 12.6.3](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) which requires [NVIDIA Driver](http://www.nvidia.com/Download/index.aspx?lang=en-us) release 560 or later. However, if you are running on a data center GPU (for example, T4 or any other data center GPU), you can use NVIDIA driver release 470.57 (or later R470), 525.85 (or later R525), 535.86 (or later R535), or 545.23 (or later R545).
 
 The CUDA driver's compatibility package only supports particular drivers. Thus, users should upgrade from all R418, R440, R450, R460, R510, R520, R530, R545 and R555 drivers, which are not forward-compatible with CUDA 12.6. For a complete list of supported drivers, see the [CUDA Application Compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#use-the-right-compat-package) topic. For more information, see [CUDA Compatibility and Upgrades](https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#cuda-compatibility-and-upgrades).
 
 
 ## **GPU Requirements**
 
-Release 24.10 supports CUDA compute capability 6.0 and later. This corresponds to GPUs in the NVIDIA Pascal, NVIDIA Volta™, NVIDIA Turing™, NVIDIA Ampere architecture, NVIDIA Hopper™, and NVIDIA Ada Lovelace architecture families. For a list of GPUs to which this compute capability corresponds, see [CUDA GPUs](https://developer.nvidia.com/cuda-gpus). For additional support details, see [Deep Learning Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).
+Release 24.12 supports CUDA compute capability 6.0 and later. This corresponds to GPUs in the NVIDIA Pascal, NVIDIA Volta™, NVIDIA Turing™, NVIDIA Ampere architecture, NVIDIA Hopper™, and NVIDIA Ada Lovelace architecture families. For a list of GPUs to which this compute capability corresponds, see [CUDA GPUs](https://developer.nvidia.com/cuda-gpus). For additional support details, see [Deep Learning Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).
 
 
 ## **Key Features and Enhancements**
 
 This Inference Server release includes the following key features and enhancements.
 
-- Optimized vLLM performance with custom metrics.
+- [vLLM backend health check](https://github.com/triton-inference-server/vllm_backend/blob/r24.12/docs/health_check.md) may be optionally enabled which unloads the model if the vLLM engine health check failed.
+- vLLM backend supports sending [additional outputs](https://github.com/triton-inference-server/vllm_backend/blob/r24.12/docs/additional_outputs.md) from vLLM if requested.
+- Improved server stability during the gRPC client cancellation.
+
 
 ## **Known Issues**
 - Numpy 2.x is not currently supported for Python Backend models and may cause them to return empty tensors unxpectedly, please use Numpy 1.x until support is added.
@@ -121,4 +124,5 @@ This Inference Server release includes the following key features and enhancemen
   - GPU tensors
   - CPU and GPU-related metrics
   - Custom execution environments
-  - The model load/unload APIs
\ No newline at end of file
+  - The model load/unload APIs
+- The latest GenAI-Perf package on pypi.org is version 0.0.9dev while the latest Triton SDK container (24.12) contains GenAI-Perf version 0.0.8.
diff --git a/docs/perf_benchmark/genai-perf-README.rst b/docs/perf_benchmark/genai-perf-README.rst
index ea6a2d0d01..c4a3c7d73d 100644
--- a/docs/perf_benchmark/genai-perf-README.rst
+++ b/docs/perf_benchmark/genai-perf-README.rst
@@ -1,32 +1,32 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 .. raw:: html
 
-   <!--
-   Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of NVIDIA CORPORATION nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-   EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-   OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-   -->
 
 GenAI-Perf
 ==========
diff --git a/docs/perf_benchmark/genai_perf.rst b/docs/perf_benchmark/genai_perf.rst
index d621431061..175662477f 100644
--- a/docs/perf_benchmark/genai_perf.rst
+++ b/docs/perf_benchmark/genai_perf.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 GenAI Performance Analyzer
 ####
diff --git a/docs/perf_benchmark/model-analyzer-README.rst b/docs/perf_benchmark/model-analyzer-README.rst
index 1c31a578ff..f31e7ca633 100644
--- a/docs/perf_benchmark/model-analyzer-README.rst
+++ b/docs/perf_benchmark/model-analyzer-README.rst
@@ -1,20 +1,32 @@
-.. raw:: html
-
-   <!--
-   Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
+.. raw:: html
 
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-   -->
 
 |License|
 
diff --git a/docs/perf_benchmark/model_analyzer.rst b/docs/perf_benchmark/model_analyzer.rst
index d66005c336..c29a96aa92 100644
--- a/docs/perf_benchmark/model_analyzer.rst
+++ b/docs/perf_benchmark/model_analyzer.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 Model Analyzer
 ####
diff --git a/docs/perf_benchmark/perf-analyzer-README.rst b/docs/perf_benchmark/perf-analyzer-README.rst
index f51d19deb9..4f678cfdba 100644
--- a/docs/perf_benchmark/perf-analyzer-README.rst
+++ b/docs/perf_benchmark/perf-analyzer-README.rst
@@ -1,32 +1,32 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 .. raw:: html
 
-   <!--
-   Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of NVIDIA CORPORATION nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-   EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-   OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-   -->
 
 Triton Performance Analyzer
 ===========================
diff --git a/docs/perf_benchmark/perf_analyzer.rst b/docs/perf_benchmark/perf_analyzer.rst
index 0aa5172c88..d6c6156a62 100644
--- a/docs/perf_benchmark/perf_analyzer.rst
+++ b/docs/perf_benchmark/perf_analyzer.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ####
 Performance Analyzer
 ####
diff --git a/docs/scaling_guide/scaling_guide.rst b/docs/scaling_guide/scaling_guide.rst
index f4d252f77e..57b4486ff0 100644
--- a/docs/scaling_guide/scaling_guide.rst
+++ b/docs/scaling_guide/scaling_guide.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ########
 Scaling guide
 ########
diff --git a/docs/server_guide/features.rst b/docs/server_guide/features.rst
index a14fa711c2..9a44645e3e 100644
--- a/docs/server_guide/features.rst
+++ b/docs/server_guide/features.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ########
 Features
 ########
diff --git a/docs/server_guide/model_pipelines.rst b/docs/server_guide/model_pipelines.rst
index 5f4dcffaaa..e12225f40d 100644
--- a/docs/server_guide/model_pipelines.rst
+++ b/docs/server_guide/model_pipelines.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ########
 Model Pipelines
 ########
diff --git a/docs/server_guide/state_management.rst b/docs/server_guide/state_management.rst
index 75f6b44b23..284bb4b5fe 100644
--- a/docs/server_guide/state_management.rst
+++ b/docs/server_guide/state_management.rst
@@ -1,3 +1,30 @@
+..
+.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. Redistribution and use in source and binary forms, with or without
+.. modification, are permitted provided that the following conditions
+.. are met:
+..  * Redistributions of source code must retain the above copyright
+..    notice, this list of conditions and the following disclaimer.
+..  * Redistributions in binary form must reproduce the above copyright
+..    notice, this list of conditions and the following disclaimer in the
+..    documentation and/or other materials provided with the distribution.
+..  * Neither the name of NVIDIA CORPORATION nor the names of its
+..    contributors may be used to endorse or promote products derived
+..    from this software without specific prior written permission.
+..
+.. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+.. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.. PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+.. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+.. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+.. OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 ########
 State Management
 ########
diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md
index 3787a89a60..faf66de25c 100644
--- a/docs/user_guide/custom_operations.md
+++ b/docs/user_guide/custom_operations.md
@@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is
 to use the [NGC TensorRT
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt)
 corresponding to the Triton container. For example, if you are using
-the 24.11 version of Triton, use the 24.11 version of the TensorRT
+the 24.12 version of Triton, use the 24.12 version of the TensorRT
 container.
 
 ## TensorFlow
@@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow
 is to use the [NGC TensorFlow
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
 corresponding to the Triton container. For example, if you are using
-the 24.11 version of Triton, use the 24.11 version of the TensorFlow
+the 24.12 version of Triton, use the 24.12 version of the TensorFlow
 container.
 
 ## PyTorch
@@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is
 to use the [NGC PyTorch
 container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch)
 corresponding to the Triton container. For example, if you are using
-the 24.11 version of Triton, use the 24.11 version of the PyTorch
+the 24.12 version of Triton, use the 24.12 version of the PyTorch
 container.
 
 ## ONNX
diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md
index ff21175bbe..4d1f067662 100644
--- a/docs/user_guide/performance_tuning.md
+++ b/docs/user_guide/performance_tuning.md
@@ -235,7 +235,7 @@ with a `tritonserver` binary.
 
 ```bash
 # Start server container
-docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.11-py3
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.12-py3
 
 # Start serving your models
 tritonserver --model-repository=/mnt/models
@@ -284,7 +284,7 @@ by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u
 
 ```bash
 # Start the SDK container interactively
-docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.11-py3-sdk
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.12-py3-sdk
 
 # Benchmark model being served from step 3
 perf_analyzer -m densenet_onnx --concurrency-range 1:4
diff --git a/python/openai/openai_frontend/frontend/fastapi/__init__.py b/python/openai/openai_frontend/frontend/fastapi/__init__.py
new file mode 100644
index 0000000000..f3dec540e2
--- /dev/null
+++ b/python/openai/openai_frontend/frontend/fastapi/__init__.py
@@ -0,0 +1,25 @@
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/python/openai/openai_frontend/frontend/fastapi/__init__py b/python/openai/openai_frontend/frontend/fastapi/__init__py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/python/openai/requirements.txt b/python/openai/requirements.txt
index 46807fcc9c..0d3fdbb8c1 100644
--- a/python/openai/requirements.txt
+++ b/python/openai/requirements.txt
@@ -25,8 +25,11 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 # FastAPI Application
-fastapi==0.111.1
+fastapi==0.115.6
 # Fix httpx version to avoid bug in openai library:
 # https://community.openai.com/t/error-with-openai-1-56-0-client-init-got-an-unexpected-keyword-argument-proxies/1040332/3
 httpx==0.27.2
 openai==1.40.6
+# Minimum starlette version needed to address CVE:
+# https://github.com/advisories/GHSA-f96h-pmfr-66vw
+starlette>=0.40.0
diff --git a/qa/L0_openai/test.sh b/qa/L0_openai/test.sh
index c910c204ac..2bff43fafe 100755
--- a/qa/L0_openai/test.sh
+++ b/qa/L0_openai/test.sh
@@ -29,11 +29,12 @@
 
 function install_deps() {
     # Install python bindings for tritonserver and tritonfrontend
-    pip install /opt/tritonserver/python/triton*.whl
+    # pip install /opt/tritonserver/python/triton*.whl
 
     # Install application/testing requirements
     pushd openai/
-    pip install -r requirements.txt
+    # NOTE: Should be pre-installed in container, but can uncomment if needed
+    # pip install -r requirements.txt
     pip install -r requirements-test.txt
 
     if [ "${IMAGE_KIND}" == "TRTLLM" ]; then
@@ -49,13 +50,17 @@ function prepare_vllm() {
 }
 
 function prepare_tensorrtllm() {
+    # FIXME: Remove when testing TRT-LLM containers built from source
+    pip install -r requirements.txt
+
     MODEL="llama-3-8b-instruct"
     MODEL_REPO="tests/tensorrtllm_models"
     rm -rf ${MODEL_REPO}
 
-    # FIXME: This will require an upgrade each release to match the TRT-LLM version
+    # FIXME: This may require an upgrade each release to match the TRT-LLM version,
+    # and would likely be easier to use trtllm-build directly for test purposes.
     # Use Triton CLI to prepare model repository for testing
-    pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.10
+    pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.1
     # NOTE: Could use ENGINE_DEST_PATH set to NFS mount for pre-built engines in future
     triton import \
         --model ${MODEL}  \
diff --git a/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
index 15f16da352..c9883c9133 100755
--- a/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
+++ b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -38,6 +38,8 @@
 import numpy as np
 import sequence_util as su
 import test_util as tu
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException, np_to_triton_dtype
 
 _test_system_shared_memory = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
 _test_cuda_shared_memory = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
@@ -77,6 +79,12 @@ def get_expected_result(self, expected_result, corrid, value, trial, flag_str=No
                     expected_result += corrid
         return expected_result
 
+    def data_type_to_string(self, dtype):
+        if dtype == "TYPE_STRING":
+            return "BYTES"
+        else:
+            return dtype.replace("TYPE_", "")
+
     def test_skip_batch(self):
         # Test model instances together are configured with
         # total-batch-size 4. Send four sequences in parallel where
@@ -221,6 +229,78 @@ def test_skip_batch(self):
                     self.cleanup_shm_regions(precreated_shm2_handles)
                     self.cleanup_shm_regions(precreated_shm3_handles)
 
+    def test_corrid_data_type(self):
+        model_name = "add_sub"
+        expected_corrid_dtype = os.environ["TRITONSERVER_CORRID_DATA_TYPE"]
+
+        for corrid, corrid_dtype in [("corrid", "TYPE_STRING"), (123, "TYPE_UINT64")]:
+            # Check if the corrid data type matches the expected corrid data type specified in the model config
+            dtypes_match = True
+            if (corrid_dtype == "TYPE_STRING") and (
+                expected_corrid_dtype != "TYPE_STRING"
+            ):
+                dtypes_match = False
+            elif (corrid_dtype == "TYPE_UINT64") and (
+                expected_corrid_dtype
+                not in ["TYPE_UINT32", "TYPE_INT32", "TYPE_UINT64", "TYPE_INT64"]
+            ):
+                dtypes_match = False
+
+            with httpclient.InferenceServerClient("localhost:8000") as client:
+                input0_data = np.random.rand(16).astype(np.float32)
+                input1_data = np.random.rand(16).astype(np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT0",
+                        input0_data.shape,
+                        np_to_triton_dtype(input0_data.dtype),
+                    ),
+                    httpclient.InferInput(
+                        "INPUT1",
+                        input1_data.shape,
+                        np_to_triton_dtype(input1_data.dtype),
+                    ),
+                ]
+
+                inputs[0].set_data_from_numpy(input0_data)
+                inputs[1].set_data_from_numpy(input1_data)
+
+                if not dtypes_match:
+                    with self.assertRaises(InferenceServerException) as e:
+                        client.infer(
+                            model_name,
+                            inputs,
+                            sequence_id=corrid,
+                            sequence_start=True,
+                            sequence_end=False,
+                        )
+                    err_str = str(e.exception)
+                    self.assertIn(
+                        f"sequence batching control 'CORRID' data-type is '{self.data_type_to_string(corrid_dtype)}', but model '{model_name}' expects '{self.data_type_to_string(expected_corrid_dtype)}'",
+                        err_str,
+                    )
+                else:
+                    response = client.infer(
+                        model_name,
+                        inputs,
+                        sequence_id=corrid,
+                        sequence_start=True,
+                        sequence_end=False,
+                    )
+                    response.get_response()
+                    output0_data = response.as_numpy("OUTPUT0")
+                    output1_data = response.as_numpy("OUTPUT1")
+
+                    self.assertTrue(
+                        np.allclose(input0_data + input1_data, output0_data),
+                        "add_sub example error: incorrect sum",
+                    )
+
+                    self.assertTrue(
+                        np.allclose(input0_data - input1_data, output1_data),
+                        "add_sub example error: incorrect difference",
+                    )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_sequence_corrid_batcher/test.sh b/qa/L0_sequence_corrid_batcher/test.sh
index 8d114a395a..3948cd7445 100755
--- a/qa/L0_sequence_corrid_batcher/test.sh
+++ b/qa/L0_sequence_corrid_batcher/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -121,6 +121,59 @@ for model_trial in 4; do
     done
 done
 
+# Test correlation ID data type
+mkdir -p corrid_data_type/add_sub/1
+cp ../python_models/add_sub/model.py corrid_data_type/add_sub/1
+
+for corrid_data_type in TYPE_STRING TYPE_UINT32 TYPE_INT32 TYPE_UINT64 TYPE_INT64; do
+    (cd corrid_data_type/add_sub && \
+    cp ../../../python_models/add_sub/config.pbtxt . && \
+    echo "sequence_batching { \
+        control_input [{ \
+            name: \"CORRID\" \
+            control [{ \
+            kind: CONTROL_SEQUENCE_CORRID \
+            data_type: $corrid_data_type \
+            }]
+        }] \
+        }" >> config.pbtxt)
+    MODEL_DIR=corrid_data_type
+
+    for i in test_corrid_data_type ; do
+        export TRITONSERVER_CORRID_DATA_TYPE=$corrid_data_type
+        SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
+        SERVER_LOG="./$i.$MODEL_DIR.server.log"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i, repository $MODEL_DIR" >>$CLIENT_LOG
+
+        set +e
+        python $BATCHER_TEST SequenceCorrIDBatcherTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test $i Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        unset TRITONSERVER_CORRID_DATA_TYPE
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+done
+
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
 else
diff --git a/qa/common/check_copyright.py b/qa/common/check_copyright.py
index 7f44426d45..95694dc460 100755
--- a/qa/common/check_copyright.py
+++ b/qa/common/check_copyright.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -60,6 +60,8 @@
     "docs/_static/.gitattributes",
     "docs/examples/model_repository",
     "docs/examples/jetson",
+    "docs/repositories.txt",
+    "docs/exclusions.txt",
     "docker",
     "qa/common/cuda_op_kernel.cu.cc.patch",
     "qa/ensemble_models/mix_platform_float32_float32_float32/output0_labels.txt",
@@ -191,6 +193,8 @@ def visit(path):
             prefix = "# "
         elif line.startswith("// "):
             prefix = "// "
+        elif line.startswith(".. "):
+            prefix = ".. "
         elif not line.startswith(COPYRIGHT_YEAR_RE[0]):
             print(
                 "incorrect prefix for copyright line, allowed prefixes '# ' or '// ', for "
diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models
index 65ea20296f..70fdf9031f 100755
--- a/qa/common/gen_jetson_trt_models
+++ b/qa/common/gen_jetson_trt_models
@@ -34,7 +34,7 @@
 # Make all generated files accessible outside of container
 umask 0000
 # Set the version of the models
-TRITON_VERSION=${TRITON_VERSION:=24.11}
+TRITON_VERSION=${TRITON_VERSION:=24.12}
 # Set the CUDA device to use
 CUDA_DEVICE=${RUNNER_ID:=0}
 # Set TensorRT image
diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops
index 78de379e63..65a498bbdf 100755
--- a/qa/common/gen_qa_custom_ops
+++ b/qa/common/gen_qa_custom_ops
@@ -37,7 +37,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=24.11}
+TRITON_VERSION=${TRITON_VERSION:=24.12}
 NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION}
 TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3}
 PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3}
@@ -156,7 +156,7 @@ echo -e "\033[34m[ INFO ] - Running:  $TFSCRIPT \033[0m "
 
 docker run \
     --rm \
-    --label RUNNER_ID=$$RUNNER_ID \
+    --label RUNNER_ID=$RUNNER_ID \
     --label PROJECT_NAME=$PROJECT_NAME \
     $DOCKER_GPU_ARGS \
     -v $DOCKER_VOLUME:/mnt \
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index c4bd68753c..93e4dc2dfd 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -48,7 +48,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=24.11}
+TRITON_VERSION=${TRITON_VERSION:=24.12}
 
 # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
 ONNX_VERSION=1.16.1
@@ -286,8 +286,8 @@ python3 $VOLUME_SRCDIR/gen_qa_dyna_sequence_implicit_models.py --onnx --onnx_ops
 chmod -R 777 $VOLUME_DYNASEQIMPLICITDESTDIR
 python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$VOLUME_RAGGEDDESTDIR
 chmod -R 777 $VOLUME_RAGGEDDESTDIR
-python3 $VOLUME_SRCDIR/gen_qa_ort_scalar_models.py --onnx_opset=$ONNX_OPSET --models_dir=$SCALARMODELSDESTDIR
-chmod -R 777 $VOLUME_RAGGEDDESTDIR
+python3 $VOLUME_SRCDIR/gen_qa_ort_scalar_models.py --onnx_opset=$ONNX_OPSET --models_dir=$VOLUME_SCALARMODELSDESTDIR
+chmod -R 777 $VOLUME_SCALARMODELSDESTDIR
 EOF
 
 chmod a+x $ONNXSCRIPT
diff --git a/qa/common/gen_qa_ort_scalar_models.py b/qa/common/gen_qa_ort_scalar_models.py
index f2ddb35912..c00a97d5ed 100755
--- a/qa/common/gen_qa_ort_scalar_models.py
+++ b/qa/common/gen_qa_ort_scalar_models.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -124,6 +124,10 @@ def create_onnx_modelconfig(models_dir, dtype, shape):
     )
 
     FLAGS = parser.parse_args()
+
+    if not FLAGS.models_dir:
+        raise Exception("--models_dir is required")
+
     create_onnx_modelfile(FLAGS.models_dir, shape=[1], dtype=np.float32)
     create_onnx_modelconfig(FLAGS.models_dir, shape=[1], dtype=np.float32)
     create_onnx_modelfile(FLAGS.models_dir, shape=[1, 1], dtype=np.float32)
diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py
index c8c01c1b8b..0e2e9cf698 100755
--- a/qa/common/gen_qa_trt_plugin_models.py
+++ b/qa/common/gen_qa_trt_plugin_models.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -27,6 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import ctypes
 import os
 
 import numpy as np
@@ -38,13 +39,13 @@
 TRT_LOGGER = trt.Logger()
 
 trt.init_libnvinfer_plugins(TRT_LOGGER, "")
-PLUGIN_CREATORS = trt.get_plugin_registry().plugin_creator_list
 
 
 def get_trt_plugin(plugin_name):
     plugin = None
     field_collection = None
-    for plugin_creator in PLUGIN_CREATORS:
+    plugin_creators = trt.get_plugin_registry().plugin_creator_list
+    for plugin_creator in plugin_creators:
         if (plugin_creator.name == "CustomHardmax") and (
             plugin_name == "CustomHardmax"
         ):
@@ -272,13 +273,37 @@ def create_plugin_models(models_dir):
     )
 
 
+def windows_load_plugin_lib(win_plugin_dll):
+    if os.path.isfile(win_plugin_dll):
+        try:
+            ctypes.CDLL(win_plugin_dll, winmode=0)
+        except TypeError:
+            # winmode only introduced in python 3.8
+            ctypes.CDLL(win_plugin_dll)
+        return
+
+    raise IOError('Failed to load library: "{}".'.format(win_plugin_dll))
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--models_dir", type=str, required=True, help="Top-level model directory"
     )
+    parser.add_argument(
+        "--win_plugin_dll",
+        type=str,
+        required=False,
+        default="",
+        help="Path to Windows plugin .dll",
+    )
     FLAGS, unparsed = parser.parse_known_args()
 
     import test_util as tu
 
+    # Linux can leverage LD_PRELOAD. We must load the Windows plugin manually
+    # in order for it to be discovered in the registry.
+    if os.name == "nt":
+        windows_load_plugin_lib(FLAGS.win_plugin_dll)
+
     create_plugin_models(FLAGS.models_dir)
diff --git a/tools/add_copyright.py b/tools/add_copyright.py
index 7a3d0ac216..a51ffbfc64 100644
--- a/tools/add_copyright.py
+++ b/tools/add_copyright.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -246,6 +246,11 @@ def html_md(path):
     update_or_add_header(path, "<!--\n" + prefix_lines(LICENSE_TEXT, "# ") + "\n-->")
 
 
+@register(has_ext([".rst"]))
+def rst(path):
+    update_or_add_header(path, prefix_lines(LICENSE_TEXT, ".. "))
+
+
 def add_copyrights(paths):
     for path in paths:
         for match, handler in FILE_TYPE_HANDLERS.items():