Create a minimum stub for tfrt_gpu_client

PiperOrigin-RevId: 731878905
openxla · Feb 28, 2025 · 3f62e54 · 3f62e54
1 parent c4429a6
commit 3f62e54
Show file tree

Hide file tree

Showing 4 changed files with 375 additions and 2 deletions.
diff --git a/xla/pjrt/gpu/BUILD b/xla/pjrt/gpu/BUILD
@@ -11,7 +11,7 @@ load("//xla/tsl/platform:rules_cc.bzl", "cc_library")
 # Integrate with PJRT rather than the GPU client directly.
 package(
     # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
-    default_visibility = ["//visibility:private"],
+    default_visibility = [":__subpackages__"],
     licenses = ["notice"],
 )
 
@@ -276,7 +276,10 @@ cc_library(
     name = "gpu_topology",
     srcs = ["gpu_topology.cc"],
     hdrs = ["gpu_topology.h"],
-    visibility = internal_visibility(["//xla/pjrt/gpu:legacy_gpu_topology_users"]),
+    visibility = internal_visibility([
+        "//xla/pjrt/gpu:legacy_gpu_topology_users",
+        ":__subpackages__",
+    ]),
     deps = [
         ":gpu_topology_proto_cc",
         "@com_google_absl//absl/strings:string_view",

diff --git a/xla/pjrt/gpu/tfrt/BUILD b/xla/pjrt/gpu/tfrt/BUILD
@@ -0,0 +1,98 @@
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm")
+load("//xla/pjrt/gpu:package_groups.bzl", "xla_gpu_internal_packages")
+load("//xla/tsl:tsl.bzl", "internal_visibility")
+load("//xla/tsl/platform:rules_cc.bzl", "cc_library")
+
+# Integrate with PJRT rather than the GPU client directly.
+package(
+    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
+    default_visibility = ["//visibility:private"],
+    licenses = ["notice"],
+)
+
+xla_gpu_internal_packages()
+
+cc_library(
+    name = "tfrt_gpu_client",
+    srcs = ["tfrt_gpu_client.cc"],
+    hdrs = ["tfrt_gpu_client.h"],
+    defines = if_cuda(["GOOGLE_CUDA=1"]) + if_rocm(["TENSORFLOW_USE_ROCM=1"]),
+    visibility = internal_visibility(["//xla/pjrt/gpu:legacy_gpu_client_users"]),
+    deps = [
+        "//xla:executable_run_options",
+        "//xla:literal",
+        "//xla:shape_tree",
+        "//xla:shape_util",
+        "//xla:status_macros",
+        "//xla:util",
+        "//xla:xla_data_proto_cc",
+        "//xla/client:executable_build_options",
+        "//xla/client:local_client",
+        "//xla/hlo/builder:xla_computation",
+        "//xla/hlo/ir:hlo",
+        "//xla/pjrt:host_memory_spaces",
+        "//xla/pjrt:mlir_to_hlo",
+        "//xla/pjrt:pjrt_client",
+        "//xla/pjrt:pjrt_common",
+        "//xla/pjrt:pjrt_compiler",
+        "//xla/pjrt:pjrt_device_description",
+        "//xla/pjrt:pjrt_executable",
+        "//xla/pjrt:pjrt_future",
+        "//xla/pjrt:pjrt_stream_executor_device_description",
+        "//xla/pjrt:semaphore",
+        "//xla/pjrt:transpose",
+        "//xla/pjrt:utils",
+        "//xla/pjrt:worker_thread",
+        "//xla/pjrt/gpu:gpu_helpers",
+        "//xla/pjrt/gpu:gpu_topology",
+        "//xla/pjrt/gpu:gpu_topology_proto_cc",
+        "//xla/pjrt/gpu:se_gpu_topology_description",
+        "//xla/service:compiler",
+        "//xla/service:computation_placer_hdr",
+        "//xla/service:executable",
+        "//xla/service:hlo_cost_analysis",
+        "//xla/service:hlo_proto_cc",
+        "//xla/service:maybe_owning_device_memory",
+        "//xla/service:shaped_buffer",
+        "//xla/service:transfer_manager",
+        "//xla/service/gpu:gpu_executable_run_options",
+        "//xla/stream_executor:device_description",
+        "//xla/stream_executor:device_memory",
+        "//xla/stream_executor:device_memory_allocator",
+        "//xla/stream_executor:platform",
+        "//xla/stream_executor:stream",
+        "//xla/stream_executor:stream_executor_h",
+        "//xla/stream_executor/integrations:tf_allocator_adapter",
+        "//xla/tsl/concurrency:async_value",
+        "//xla/tsl/concurrency:ref_count",
+        "//xla/tsl/framework:allocator",
+        "//xla/tsl/platform:env",
+        "//xla/tsl/platform:errors",
+        "//xla/tsl/platform:status",
+        "//xla/tsl/platform:statusor",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/container:inlined_vector",
+        "@com_google_absl//absl/functional:any_invocable",
+        "@com_google_absl//absl/log",
+        "@com_google_absl//absl/log:check",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/strings:string_view",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/types:span",
+        "@eigen_archive//:eigen3",
+        "@llvm-project//mlir:IR",
+        "@tsl//tsl/platform:casts",
+        "@tsl//tsl/platform:fingerprint",
+        "@tsl//tsl/platform:protobuf",
+        "@tsl//tsl/profiler/lib:connected_traceme",
+        "@tsl//tsl/profiler/lib:context_types_hdrs",
+        "@tsl//tsl/profiler/lib:traceme",
+    ],
+)
diff --git a/xla/pjrt/gpu/tfrt/tfrt_gpu_client.cc b/xla/pjrt/gpu/tfrt/tfrt_gpu_client.cc
@@ -0,0 +1,111 @@
+/* Copyright 2025 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/pjrt/gpu/tfrt/tfrt_gpu_client.h"
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/log/check.h"
+#include "absl/log/log.h"
+#include "unsupported/Eigen/CXX11/Tensor"
+#include "xla/client/local_client.h"
+#include "xla/pjrt/gpu/gpu_helpers.h"
+#include "xla/pjrt/gpu/gpu_topology.h"
+#include "xla/pjrt/gpu/gpu_topology.pb.h"
+#include "xla/pjrt/pjrt_client.h"
+#include "xla/pjrt/pjrt_compiler.h"
+#include "xla/stream_executor/device_description.h"
+#include "xla/stream_executor/platform.h"
+#include "xla/stream_executor/stream_executor.h"
+#include "xla/tsl/framework/allocator.h"
+#include "xla/tsl/platform/statusor.h"
+#include "xla/xla_data.pb.h"
+
+namespace xla {
+
+static absl::StatusOr<std::vector<std::unique_ptr<TfrtGpuDevice>>>
+GetTfrtGpuDevices(LocalClient* xla_client) {
+  std::vector<std::unique_ptr<TfrtGpuDevice>> devices;
+  int i = 0;
+  for (se::StreamExecutor* executor :
+       xla_client->backend().stream_executors()) {
+    // TODO(b/382117736): allow GPU allocator parameters to be configurable.
+    TF_ASSIGN_OR_RETURN(auto allocator,
+                        CreateBFCAllocator(executor, /*memory_fraction=*/0.9,
+                                           /*preallocate=*/true, std::nullopt));
+
+    TfrtGpuDevice::Options options;
+    options.id = i;
+    options.local_device_id = PjRtLocalDeviceId(i);
+    options.local_hardware_id = PjRtLocalHardwareId(i);
+    options.executor = executor;
+    options.allocator = std::move(allocator);
+    options.stream_capacity = 4;
+    options.max_inflight_computations = 1;
+    const se::Platform* platform = executor->GetPlatform();
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<xla::se::DeviceDescription> desc,
+        platform->DescriptionForDevice(options.local_hardware_id.value()));
+    options.platform_version = desc->name();
+
+    auto device = std::make_unique<TfrtGpuDevice>(std::move(options));
+    devices.push_back(std::move(device));
+    ++i;
+  }
+  return std::move(devices);
+}
+
+absl::StatusOr<std::unique_ptr<PjRtClient>> GetTfrtGpuClient(
+    TfrtGpuClient::Options options) {
+  TF_ASSIGN_OR_RETURN(
+      LocalClient * xla_client,
+      GetGpuXlaClient(options.platform_name, options.allowed_devices));
+  EnablePeerAccess(xla_client->backend().stream_executors());
+  std::unique_ptr<tsl::Allocator> host_memory_allocator;
+  if (!xla_client->backend().stream_executors().empty()) {
+    TF_ASSIGN_OR_RETURN(
+        host_memory_allocator,
+        GetGpuHostAllocator(xla_client->backend().stream_executors().front()));
+  }
+  TF_ASSIGN_OR_RETURN(std::vector<std::unique_ptr<TfrtGpuDevice>> devices,
+                      GetTfrtGpuDevices(xla_client));
+
+  GpuTopologyProto gpu_topology_proto;
+  for (const auto& device : devices) {
+    if (gpu_topology_proto.platform_version().empty()) {
+      gpu_topology_proto.set_platform_version(
+          std::string(device->device_kind()));
+    }
+    gpu_topology_proto.add_device_ids(device->id());
+  }
+
+  // TODO(b/382117736): Support multi-host
+  gpu_topology_proto.set_num_slices(1);
+  gpu_topology_proto.set_num_hosts_per_slice(1);
+  gpu_topology_proto.set_num_devices_per_host(devices.size());
+
+  auto gpu_topology = std::shared_ptr<const GpuTopology>(
+      GpuTopology::FromProto(gpu_topology_proto));
+
+  return std::unique_ptr<PjRtClient>(std::make_unique<TfrtGpuClient>(
+      /*process_index=*/0, xla_client, std::move(devices),
+      std::move(host_memory_allocator), gpu_topology));
+}
+
+}  // namespace xla
diff --git a/xla/pjrt/gpu/tfrt/tfrt_gpu_client.h b/xla/pjrt/gpu/tfrt/tfrt_gpu_client.h
@@ -0,0 +1,161 @@
+/* Copyright 2025 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef XLA_PJRT_GPU_TFRT_TFRT_GPU_CLIENT_H_
+#define XLA_PJRT_GPU_TFRT_TFRT_GPU_CLIENT_H_
+
+#include <memory>
+#include <optional>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "absl/log/check.h"
+#include "absl/log/log.h"
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "unsupported/Eigen/CXX11/Tensor"
+#include "xla/client/local_client.h"
+#include "xla/literal.h"
+#include "xla/pjrt/gpu/gpu_topology.h"
+#include "xla/pjrt/pjrt_client.h"
+#include "xla/pjrt/pjrt_common.h"
+#include "xla/pjrt/pjrt_compiler.h"
+#include "xla/pjrt/pjrt_future.h"
+#include "xla/service/hlo.pb.h"
+#include "xla/stream_executor/platform.h"
+#include "xla/stream_executor/stream.h"
+#include "xla/stream_executor/stream_executor.h"
+#include "xla/tsl/framework/allocator.h"
+#include "xla/xla_data.pb.h"
+#include "tsl/platform/fingerprint.h"
+
+namespace xla {
+
+class TfrtGpuDevice final : public PjRtDevice {
+ public:
+  struct Options {
+    int id;
+    PjRtLocalDeviceId local_device_id;
+    PjRtLocalHardwareId local_hardware_id;
+    se::StreamExecutor* executor;
+    std::unique_ptr<tsl::Allocator> allocator;
+    int stream_capacity;
+    int max_inflight_computations;
+    std::string platform_version;
+  };
+
+  explicit TfrtGpuDevice(Options&& options);
+
+  PjRtClient* client() const override { return client_; }
+
+  bool IsAddressable() const override {
+    return process_index() == client()->process_index();
+  }
+
+  int id() const override { return id_; }
+
+  int process_index() const override { return 0; }
+
+  PjRtLocalDeviceId local_device_id() const override {
+    return local_device_id_;
+  }
+
+  // Used as `device_ordinal`.
+  PjRtLocalHardwareId local_hardware_id() const override {
+    return local_hardware_id_;
+  }
+
+  absl::string_view DebugString() const override;
+
+  absl::string_view ToString() const override;
+
+  absl::Status TransferToInfeed(const LiteralSlice& literal) override;
+
+  absl::Status TransferFromOutfeed(MutableBorrowingLiteral literal) override;
+
+  absl::Span<PjRtMemorySpace* const> memory_spaces() const override;
+
+  absl::StatusOr<PjRtMemorySpace*> default_memory_space() const override;
+
+  std::unique_ptr<ScopedAsyncTrackingEvent> CreateAsyncTrackingEvent(
+      absl::string_view description) const override {
+    return nullptr;
+  }
+
+ private:
+  int id_;
+  PjRtClient* client_ = nullptr;
+  PjRtLocalDeviceId local_device_id_;
+  PjRtLocalHardwareId local_hardware_id_;
+};
+
+class TfrtGpuClient final : public PjRtClient {
+ public:
+  struct Options {
+    std::optional<std::set<int>> allowed_devices;
+    std::optional<std::string> platform_name;
+  };
+
+  TfrtGpuClient(int process_index, xla::LocalClient* xla_client,
+                std::vector<std::unique_ptr<TfrtGpuDevice>> devices,
+                std::unique_ptr<tsl::Allocator> host_memory_allocator,
+                std::shared_ptr<const GpuTopology> gpu_topology);
+
+  int process_index() const override { return process_index_; }
+
+  int device_count() const override { return devices_.size(); }
+
+  int addressable_device_count() const override {
+    return addressable_devices_.size();
+  }
+
+  absl::Span<PjRtDevice* const> devices() const override { return devices_; }
+
+  absl::Span<PjRtDevice* const> addressable_devices() const override {
+    return addressable_devices_;
+  }
+
+  absl::Span<PjRtMemorySpace* const> memory_spaces() const override;
+
+  PjRtPlatformId platform_id() const override {
+    // TODO(b/382117736): Add support for ROCM and SYCL.
+    return tsl::Fingerprint64(xla::CudaName());
+  }
+
+  absl::string_view platform_name() const override { return xla::CudaName(); }
+
+  absl::string_view platform_version() const override;
+
+ private:
+  int process_index_;
+
+  se::Platform* platform_;
+  xla::LocalClient* xla_client_;
+
+  std::vector<PjRtDevice*> devices_;
+
+  // Addressable devices indexed by core_id.
+  std::vector<PjRtDevice*> addressable_devices_;
+};
+
+absl::StatusOr<std::unique_ptr<PjRtClient>> GetTfrtGpuClient(
+    TfrtGpuClient::Options options);
+
+}  // namespace xla
+
+#endif  // XLA_PJRT_GPU_TFRT_TFRT_GPU_CLIENT_H_