diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash
index 9c2de49d6..1a0978bdb 100644
--- a/.github/scripts/fbgemm_gpu_build.bash
+++ b/.github/scripts/fbgemm_gpu_build.bash
@@ -402,35 +402,3 @@ build_fbgemm_gpu_develop () {
 
   echo "[BUILD] FBGEMM-GPU build + develop completed"
 }
-
-install_fbgemm_gpu_package () {
-  local env_name="$1"
-  local package_name="$2"
-  if [ "$package_name" == "" ]; then
-    echo "Usage: ${FUNCNAME[0]} ENV_NAME WHEEL_NAME"
-    echo "Example(s):"
-    echo "    ${FUNCNAME[0]} build_env fbgemm_gpu.whl     # Install the package (wheel)"
-    return 1
-  else
-    echo "################################################################################"
-    echo "# Install FBGEMM-GPU Package (Wheel)"
-    echo "#"
-    echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
-    echo "################################################################################"
-    echo ""
-  fi
-
-  echo "[INSTALL] Printing out FBGEMM-GPU wheel SHA: ${package_name}"
-  print_exec sha1sum "${package_name}"
-  print_exec sha256sum "${package_name}"
-  print_exec md5sum "${package_name}"
-
-  echo "[INSTALL] Installing FBGEMM-GPU wheel: ${package_name} ..."
-  (exec_with_retries conda run -n "${env_name}" python -m pip install "${package_name}") || return 1
-
-  echo "[INSTALL] Checking imports ..."
-  (test_python_import "${env_name}" fbgemm_gpu) || return 1
-  (test_python_import "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
-
-  echo "[INSTALL] Wheel installation completed ..."
-}
diff --git a/.github/scripts/fbgemm_gpu_install.bash b/.github/scripts/fbgemm_gpu_install.bash
new file mode 100644
index 000000000..9793bcf34
--- /dev/null
+++ b/.github/scripts/fbgemm_gpu_install.bash
@@ -0,0 +1,134 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# shellcheck disable=SC1091,SC2128
+. "$( dirname -- "$BASH_SOURCE"; )/utils_base.bash"
+
+################################################################################
+# FBGEMM_GPU Install Functions
+################################################################################
+
+install_fbgemm_gpu_wheel () {
+  local env_name="$1"
+  local wheel_path="$2"
+  if [ "$wheel_path" == "" ]; then
+    echo "Usage: ${FUNCNAME[0]} ENV_NAME WHEEL_NAME"
+    echo "Example(s):"
+    echo "    ${FUNCNAME[0]} build_env fbgemm_gpu.whl     # Install the package (wheel)"
+    return 1
+  else
+    echo "################################################################################"
+    echo "# Install FBGEMM-GPU from Wheel"
+    echo "#"
+    echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
+    echo "################################################################################"
+    echo ""
+  fi
+
+  echo "[INSTALL] Printing out FBGEMM-GPU wheel SHA: ${wheel_path}"
+  print_exec sha1sum "${wheel_path}"
+  print_exec sha256sum "${wheel_path}"
+  print_exec md5sum "${wheel_path}"
+
+  echo "[INSTALL] Installing FBGEMM-GPU wheel: ${wheel_path} ..."
+  (exec_with_retries conda run -n "${env_name}" python -m pip install "${wheel_path}") || return 1
+
+  echo "[INSTALL] Checking imports ..."
+  (test_python_import "${env_name}" fbgemm_gpu) || return 1
+  (test_python_import "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
+
+  echo "[INSTALL] FBGEMM-GPU installation through wheel completed ..."
+}
+
+
+
+install_fbgemm_gpu_pip () {
+  local env_name="$1"
+  local fbgemm_gpu_version="$2"
+  local fbgemm_gpu_variant_type="$3"
+  local fbgemm_gpu_variant_version="$4"
+  if [ "$fbgemm_gpu_variant_type" == "" ]; then
+    echo "Usage: ${FUNCNAME[0]} ENV_NAME FBGEMM_GPU_VERSION FBGEMM_GPU_VARIANT_TYPE [FBGEMM_GPU_VARIANT_VERSION]"
+    echo "Example(s):"
+    echo "    ${FUNCNAME[0]} build_env 0.5.0rc2 cuda 12.1.1        # Install a specific version of the package (PyPI)"
+    return 1
+  else
+    echo "################################################################################"
+    echo "# Install FBGEMM-GPU Package from PIP"
+    echo "#"
+    echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
+    echo "################################################################################"
+    echo ""
+  fi
+
+  # Set the package variant
+  if [ "$fbgemm_gpu_variant_type" == "cuda" ]; then
+    # Extract the CUDA version or default to 11.8.0
+    local cuda_version="${fbgemm_gpu_variant_version:-11.8.0}"
+    # shellcheck disable=SC2206
+    local cuda_version_arr=(${cuda_version//./ })
+    # Convert, i.e. cuda 11.7.1 => cu117
+    local fbgemm_gpu_variant="cu${cuda_version_arr[0]}${cuda_version_arr[1]}"
+  elif [ "$fbgemm_gpu_variant_type" == "rocm" ]; then
+    # Extract the ROCM version or default to 5.5.1
+    local rocm_version="${fbgemm_gpu_variant_version:-5.5.1}"
+    # shellcheck disable=SC2206
+    local rocm_version_arr=(${rocm_version//./ })
+    # Convert, i.e. rocm 5.5.1 => rocm5.5
+    local fbgemm_gpu_variant="rocm${rocm_version_arr[0]}.${rocm_version_arr[1]}"
+  else
+    local fbgemm_gpu_variant_type="cpu"
+    local fbgemm_gpu_variant="cpu"
+  fi
+  echo "[INSTALL] Extracted FBGEMM-GPU variant: ${fbgemm_gpu_variant}"
+
+  # Set the package name and installation channel
+#   if [ "$fbgemm_gpu_version" == "nightly" ] || [ "$fbgemm_gpu_version" == "test" ]; then
+#     local fbgemm_gpu_package="--pre fbgemm-gpu"
+#     local fbgemm_gpu_channel="https://download.pytorch.org/whl/${fbgemm_gpu_version}/${fbgemm_gpu_variant}/"
+#   elif [ "$fbgemm_gpu_version" == "latest" ]; then
+#     local fbgemm_gpu_package="fbgemm-gpu"
+#     local fbgemm_gpu_channel="https://download.pytorch.org/whl/${fbgemm_gpu_variant}/"
+#   else
+#     local fbgemm_gpu_package="fbgemm-gpu==${fbgemm_gpu_version}+${fbgemm_gpu_variant}"
+#     local fbgemm_gpu_channel="https://download.pytorch.org/whl/${fbgemm_gpu_variant}/"
+#   fi
+
+  if [ "$fbgemm_gpu_variant_type" == "cuda" ]; then
+    if [ "$fbgemm_gpu_version" == "nightly" ]; then
+      local fbgemm_gpu_package="fbgemm-gpu-nightly"
+    elif [ "$fbgemm_gpu_version" == "latest" ]; then
+      local fbgemm_gpu_package="fbgemm-gpu"
+    else
+      local fbgemm_gpu_package="fbgemm-gpu==${fbgemm_gpu_version}"
+    fi
+
+  elif [ "$fbgemm_gpu_variant_type" == "rocm" ]; then
+    echo "ROCm is currently not supported in PyPI!"
+    return 1
+
+  else
+    if [ "$fbgemm_gpu_version" == "nightly" ]; then
+      local fbgemm_gpu_package="fbgemm-gpu-nightly-cpu"
+    elif [ "$fbgemm_gpu_version" == "latest" ]; then
+      local fbgemm_gpu_package="fbgemm-gpu-cpu"
+    else
+      local fbgemm_gpu_package="fbgemm-gpu-cpu==${fbgemm_gpu_version}"
+    fi
+  fi
+
+  echo "[INSTALL] Attempting to install FBGEMM-GPU ${fbgemm_gpu_version}+${fbgemm_gpu_variant} through PIP ..."
+  # shellcheck disable=SC2086
+  (exec_with_retries conda run -n "${env_name}" pip install ${fbgemm_gpu_package}) || return 1
+
+  echo "[INSTALL] Checking imports ..."
+  (test_python_import "${env_name}" fbgemm_gpu) || return 1
+  (test_python_import "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
+
+  echo "[INSTALL] FBGEMM-GPU installation through PIP completed ..."
+}
diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash
index 4e37a1a17..809c1c6be 100644
--- a/.github/scripts/fbgemm_gpu_test.bash
+++ b/.github/scripts/fbgemm_gpu_test.bash
@@ -170,8 +170,6 @@ test_setup_conda_environment () {
   else
     install_pytorch_pip       "${env_name}" "${pytorch_version}" "${pytorch_variant_type}" "${pytorch_variant_version}" || return 1
   fi
-
-  return "${env_name}"
 }
 
 test_fbgemm_gpu_build_and_install () {
@@ -184,7 +182,7 @@ test_fbgemm_gpu_build_and_install () {
   build_fbgemm_gpu_package    "${env_name}" release "${pytorch_variant_type}" || return 1
   # shellcheck disable=SC2164
   cd -
-  install_fbgemm_gpu_package  "${env_name}" fbgemm_gpu/dist/*.whl             || return 1
+  install_fbgemm_gpu_wheel  "${env_name}" fbgemm_gpu/dist/*.whl             || return 1
 
   cd fbgemm_gpu/test                        || return 1
   run_fbgemm_gpu_tests        "${env_name}" || return 1
diff --git a/.github/scripts/setup_env.bash b/.github/scripts/setup_env.bash
index ca9e25ae0..0bc8449c5 100755
--- a/.github/scripts/setup_env.bash
+++ b/.github/scripts/setup_env.bash
@@ -22,6 +22,8 @@
 # shellcheck disable=SC1091,SC2128
 . "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_docs.bash"
 # shellcheck disable=SC1091,SC2128
+. "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_install.bash"
+# shellcheck disable=SC1091,SC2128
 . "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_lint.bash"
 # shellcheck disable=SC1091,SC2128
 . "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_test.bash"
diff --git a/.github/scripts/utils_pytorch.bash b/.github/scripts/utils_pytorch.bash
index 846eb17ea..5c2f97fab 100644
--- a/.github/scripts/utils_pytorch.bash
+++ b/.github/scripts/utils_pytorch.bash
@@ -118,15 +118,15 @@ install_pytorch_pip () {
 
   # Set the package variant
   if [ "$pytorch_variant_type" == "cuda" ]; then
-    # Extract the CUDA version or default to 11.7.1
-    local cuda_version="${pytorch_variant_version:-11.7.1}"
+    # Extract the CUDA version or default to 11.8.0
+    local cuda_version="${pytorch_variant_version:-11.8.0}"
     # shellcheck disable=SC2206
     local cuda_version_arr=(${cuda_version//./ })
     # Convert, i.e. cuda 11.7.1 => cu117
     local pytorch_variant="cu${cuda_version_arr[0]}${cuda_version_arr[1]}"
   elif [ "$pytorch_variant_type" == "rocm" ]; then
-    # Extract the ROCM version or default to 5.3
-    local rocm_version="${pytorch_variant_version:-5.3}"
+    # Extract the ROCM version or default to 5.5.1
+    local rocm_version="${pytorch_variant_version:-5.5.1}"
     # shellcheck disable=SC2206
     local rocm_version_arr=(${rocm_version//./ })
     # Convert, i.e. rocm 5.5.1 => rocm5.5
diff --git a/.github/workflows/fbgemm_gpu_ci.yml b/.github/workflows/fbgemm_gpu_ci.yml
index ee3e46ccf..a1817a376 100644
--- a/.github/workflows/fbgemm_gpu_ci.yml
+++ b/.github/workflows/fbgemm_gpu_ci.yml
@@ -88,10 +88,10 @@ jobs:
     - name: Prepare FBGEMM_GPU Build
       run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
 
-    - name: Build FBGEMM_GPU-ROCM Nightly
+    - name: Build FBGEMM_GPU-ROCm Nightly
       run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_develop $BUILD_ENV rocm gfx90a
 
-    - name: Test FBGEMM_GPU-ROCM Nightly Installation
+    - name: Test FBGEMM_GPU-ROCm Nightly Installation
       timeout-minutes: 10
       run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm
 
@@ -154,10 +154,10 @@ jobs:
     - name: Prepare FBGEMM_GPU Build
       run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
 
-    - name: Build FBGEMM_GPU-ROCM Nightly
+    - name: Build FBGEMM_GPU-ROCm Nightly
       run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_develop $BUILD_ENV rocm
 
-    - name: Test FBGEMM_GPU-ROCM Nightly Installation
+    - name: Test FBGEMM_GPU-ROCm Nightly Installation
       timeout-minutes: 15
       run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm
 
diff --git a/.github/workflows/fbgemm_gpu_cpu_nightly.yml b/.github/workflows/fbgemm_gpu_cpu_nightly.yml
index 1f6547848..2b279bc5e 100644
--- a/.github/workflows/fbgemm_gpu_cpu_nightly.yml
+++ b/.github/workflows/fbgemm_gpu_cpu_nightly.yml
@@ -171,7 +171,7 @@ jobs:
       run: |
         . $PRELUDE
         pwd; ls -la .
-        install_fbgemm_gpu_package $BUILD_ENV *.whl
+        install_fbgemm_gpu_wheel $BUILD_ENV *.whl
 
     - name: Test with PyTest
       timeout-minutes: 10
diff --git a/.github/workflows/fbgemm_gpu_cpu_release.yml b/.github/workflows/fbgemm_gpu_cpu_release.yml
index 42193182a..6e11f7d02 100644
--- a/.github/workflows/fbgemm_gpu_cpu_release.yml
+++ b/.github/workflows/fbgemm_gpu_cpu_release.yml
@@ -158,7 +158,7 @@ jobs:
       run: |
         . $PRELUDE
         pwd; ls -la .
-        install_fbgemm_gpu_package $BUILD_ENV *.whl
+        install_fbgemm_gpu_wheel $BUILD_ENV *.whl
 
     - name: Test with PyTest
       timeout-minutes: 10
diff --git a/.github/workflows/fbgemm_gpu_cuda_nightly.yml b/.github/workflows/fbgemm_gpu_cuda_nightly.yml
index 30a87c49b..7dd98f208 100644
--- a/.github/workflows/fbgemm_gpu_cuda_nightly.yml
+++ b/.github/workflows/fbgemm_gpu_cuda_nightly.yml
@@ -177,7 +177,7 @@ jobs:
       run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
 
     - name: Install FBGEMM_GPU Nightly
-      run: . $PRELUDE; install_fbgemm_gpu_package $BUILD_ENV *.whl
+      run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
 
     - name: Test with PyTest
       timeout-minutes: 10
diff --git a/.github/workflows/fbgemm_gpu_cuda_release.yml b/.github/workflows/fbgemm_gpu_cuda_release.yml
index bb4ad8fa6..20c9b188f 100644
--- a/.github/workflows/fbgemm_gpu_cuda_release.yml
+++ b/.github/workflows/fbgemm_gpu_cuda_release.yml
@@ -27,6 +27,12 @@ on:
         type: boolean
         required: false
         default: false
+      cuda_version:
+        description: CUDA Version to Use for PyPI Publishing
+        type: choice
+        required: false
+        options: [ "11.8.0", "12.1.1" ]
+        default: "11.8.0"
 
 concurrency:
   # Cancel previous runs in the PR if a new commit is pushed
@@ -124,8 +130,6 @@ jobs:
         ]
         python-version: [ "3.8", "3.9", "3.10", "3.11" ]
         cuda-version: [ "11.8.0", "12.1.1" ]
-        # Specify exactly ONE CUDA version for artifact publish
-        cuda-version-publish: [ "11.8.0" ]
     needs: build_artifact
 
     steps:
@@ -164,14 +168,14 @@ jobs:
       run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
 
     - name: Install FBGEMM_GPU
-      run: . $PRELUDE; install_fbgemm_gpu_package $BUILD_ENV *.whl
+      run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
 
     - name: Test with PyTest
       timeout-minutes: 10
       run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV
 
     - name: Push FBGEMM_GPU Binary to PYPI
-      if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish }}
+      if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
       run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu-*.whl "$PYPI_TOKEN"
diff --git a/.github/workflows/fbgemm_gpu_pip.yml b/.github/workflows/fbgemm_gpu_pip.yml
new file mode 100644
index 000000000..d2903e6b9
--- /dev/null
+++ b/.github/workflows/fbgemm_gpu_pip.yml
@@ -0,0 +1,194 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+name: FBGEMM_GPU PIP Install + Test
+
+on:
+  # Manual Trigger
+  #
+  workflow_dispatch:
+    inputs:
+      fbgemm_gpu_version:
+        description: FBGEMM-GPU Version (e.g. '0.5.0rc1')
+        type: string
+        required: true
+      fbgemm_gpu_variant_type:
+        description: FBGEMM-GPU Variant
+        type: choice
+        required: true
+        options: [ "cpu", "cuda", "rocm" ]
+        default: "cpu"
+      fbgemm_gpu_variant_version:
+        description: FBGEMM-GPU Variant Version (e.g. 'CUDA 12.1.1' --> 12.1.1)
+        type: string
+        required: false
+
+
+jobs:
+  test_pypi_install_cpu:
+    if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cpu' }}
+    runs-on: ${{ matrix.host-machine.instance }}
+    container:
+      image: amazonlinux:2023
+      options: --user root
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: test_install
+    strategy:
+      fail-fast: false
+      matrix:
+        host-machine: [
+          { instance: "linux.4xlarge" },
+          { instance: "linux.arm64.2xlarge" },
+        ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+
+    steps:
+    - name: Setup Build Container
+      run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which
+
+    - name: Checkout the Repository
+      uses: actions/checkout@v3
+
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info; print_ec2_info
+
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Setup Miniconda
+      run: . $PRELUDE; setup_miniconda $HOME/miniconda
+
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install PyTorch-CPU
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV test cpu
+
+    - name: Install FBGEMM_GPU-CPU
+      run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm_gpu_version }} cpu
+
+    - name: Test with PyTest
+      timeout-minutes: 10
+      run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu
+
+
+  test_pypi_install_cuda:
+    if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cuda' }}
+    runs-on: ${{ matrix.host-machine.instance }}
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: test_install
+      ENFORCE_NVIDIA_GPU: 1
+    strategy:
+      fail-fast: false
+      matrix:
+        host-machine: [
+          { instance: "linux.g5.4xlarge.nvidia.gpu" },
+        ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        cuda-version: [ "11.8.0", "12.1.1" ]
+        # Specify exactly ONE CUDA version for artifact publish
+        cuda-version-publish: [ "11.8.0" ]
+
+    steps:
+    - name: Checkout the Repository
+      uses: actions/checkout@v3
+
+    - name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
+      uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info; print_ec2_info
+
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Setup Miniconda
+      run: . $PRELUDE; setup_miniconda $HOME/miniconda
+
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install CUDA
+      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
+
+    - name: Install PyTorch-CUDA
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda ${{ matrix.cuda-version }}
+
+    - name: Install FBGEMM_GPU-CUDA
+      run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm_gpu_version }} cuda ${{ github.event.inputs.fbgemm_gpu_variant_version }}
+
+    - name: Test with PyTest
+      timeout-minutes: 10
+      run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV
+
+
+  test_pypi_install_rocm:
+    if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'rocm' }}
+    runs-on: ${{ matrix.host-machine.instance }}
+    container:
+      image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete"
+      options: --user root --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: test_install
+      ENFORCE_AMD_GPU: 1
+    strategy:
+      fail-fast: false
+      matrix:
+        host-machine: [
+          { instance: "rocm" },
+        ]
+        # ROCm machines are limited, so we only test against Python 3.10
+        python-version: [ "3.10" ]
+        rocm-version: [ "5.5.1", "5.6" ]
+
+    steps:
+    - name: Setup Build Container
+      run: |
+        apt update -y
+        apt install -y git wget
+        git config --global --add safe.directory '*'
+
+    - name: Checkout the Repository
+      uses: actions/checkout@v3
+
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info
+
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Free Disk Space
+      run: . $PRELUDE; free_disk_space
+
+    - name: Setup Miniconda
+      run: . $PRELUDE; setup_miniconda $HOME/miniconda
+
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install Build Tools
+      run: . $PRELUDE; install_build_tools $BUILD_ENV
+
+    - name: Install PyTorch-ROCm
+      run:  . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly rocm ${{ matrix.rocm-version }}
+
+    - name: Install FBGEMM_GPU-ROCm
+      run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm_gpu_version }} rocm ${{ github.event.inputs.fbgemm_gpu_variant_version }}
+
+    - name: Test FBGEMM_GPU-ROCm
+      timeout-minutes: 15
+      run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm