From 5dfebe1c486d86fed0da0b030589306385539bb3 Mon Sep 17 00:00:00 2001
From: Izzy Putterman <iputterman@nvidia.com>
Date: Thu, 7 Nov 2024 11:06:03 -0800
Subject: [PATCH] PA: Fixed scheduler manager (#88)

* Do I know what I am doing?

* compiles apparently

* Move to request_rate

* Fix small bugs

* more small fixes

* Maybe fix small issue

* Actually initialize empty

* Convert to const

* Drop old files

* Force request-count to equal schedule length

* Copy from GenAI branch

* Deformat

* Reorganize arg order

Fix pre-commit errors

* Fix errors, modify design

* fix pre-commit

Fix comments

---------

Co-authored-by: lkomali <lkomali@nvidia.com>
Co-authored-by: Harshini Komali <157742537+lkomali@users.noreply.github.com>
---
 src/CMakeLists.txt                     |   2 +
 src/command_line_parser.cc             |  29 +++++-
 src/command_line_parser.h              |   2 +
 src/custom_request_schedule_manager.cc | 121 +++++++++++++++++++++++++
 src/custom_request_schedule_manager.h  | 109 ++++++++++++++++++++++
 src/inference_profiler.cc              |   5 +-
 src/inference_profiler.h               |   6 +-
 src/perf_analyzer.cc                   |  31 ++++---
 src/request_rate_manager.cc            |   6 +-
 src/request_rate_manager.h             |   9 +-
 10 files changed, 292 insertions(+), 28 deletions(-)
 create mode 100644 src/custom_request_schedule_manager.cc
 create mode 100644 src/custom_request_schedule_manager.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index caaae35c..1d51c164 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -130,6 +130,7 @@ set(
   data_loader.cc
   concurrency_manager.cc
   request_rate_manager.cc
+  custom_request_schedule_manager.cc
   load_worker.cc
   concurrency_worker.cc
   request_rate_worker.cc
@@ -160,6 +161,7 @@ set(
   data_loader.h
   concurrency_manager.h
   request_rate_manager.h
+  custom_request_schedule_manager.h
   custom_load_manager.h
   iworker.h
   load_worker.h
diff --git a/src/command_line_parser.cc b/src/command_line_parser.cc
index 0d07c42a..e0de94ff 100644
--- a/src/command_line_parser.cc
+++ b/src/command_line_parser.cc
@@ -53,7 +53,7 @@ SplitString(const std::string& str, const std::string& delimiter = ":")
   std::vector<std::string> substrs;
   size_t pos = 0;
   while (pos != std::string::npos) {
-    size_t colon_pos = str.find(":", pos);
+    size_t colon_pos = str.find(delimiter, pos);
     substrs.push_back(str.substr(pos, colon_pos - pos));
     if (colon_pos == std::string::npos) {
       pos = colon_pos;
@@ -908,6 +908,7 @@ CLParser::ParseCommandLine(int argc, char** argv)
       {"endpoint", required_argument, 0, long_option_idx_base + 61},
       {"request-count", required_argument, 0, long_option_idx_base + 62},
       {"warmup-request-count", required_argument, 0, long_option_idx_base + 63},
+      {"schedule", required_argument, 0, long_option_idx_base + 64},
       {0, 0, 0, 0}};
 
   // Parse commandline...
@@ -1647,7 +1648,9 @@ CLParser::ParseCommandLine(int argc, char** argv)
           if (std::stoi(optarg) < 0) {
             Usage("Failed to parse --request-count. The value must be > 0.");
           }
-          params_->request_count = std::stoi(optarg);
+          if (params_->request_count == 0) {
+            params_->request_count = std::stoi(optarg);
+          }
           break;
         }
         case long_option_idx_base + 63: {
@@ -1659,6 +1662,17 @@ CLParser::ParseCommandLine(int argc, char** argv)
           params_->warmup_request_count = std::stoi(optarg);
           break;
         }
+        case long_option_idx_base + 64: {
+          std::vector<float> schedule;
+          std::string arg = optarg;
+          std::vector<std::string> float_strings = SplitString(optarg, ",");
+          for (const std::string& str : float_strings) {
+            schedule.push_back(std::stof(str));
+          }
+          params_->schedule = schedule;
+          params_->request_count = schedule.size();
+          break;
+        }
         case 'v':
           params_->extra_verbose = params_->verbose;
           params_->verbose = true;
@@ -1977,9 +1991,13 @@ CLParser::VerifyOptions()
       Usage(
           "perf_analyzer supports only grpc protocol for TensorFlow Serving.");
     } else if (params_->streaming) {
-      Usage("perf_analyzer does not support streaming for TensorFlow Serving.");
+      Usage(
+          "perf_analyzer does not support streaming for TensorFlow "
+          "Serving.");
     } else if (params_->async) {
-      Usage("perf_analyzer does not support async API for TensorFlow Serving.");
+      Usage(
+          "perf_analyzer does not support async API for TensorFlow "
+          "Serving.");
     } else if (!params_->using_batch_size) {
       params_->batch_size = 0;
     }
@@ -2008,7 +2026,8 @@ CLParser::VerifyOptions()
     if (params_->async && params_->streaming &&
         params_->shared_memory_type != SharedMemoryType::NO_SHARED_MEMORY) {
       Usage(
-          "Cannot use --shared-memory=system or --shared-memory=cuda with "
+          "Cannot use --shared-memory=system or --shared-memory=cuda "
+          "with "
           "--service-kind=triton_c_api and --async and --streaming.");
     }
 
diff --git a/src/command_line_parser.h b/src/command_line_parser.h
index 7c8f4977..e0d86cc2 100644
--- a/src/command_line_parser.h
+++ b/src/command_line_parser.h
@@ -157,6 +157,8 @@ struct PerfAnalyzerParameters {
   Range<uint64_t> periodic_concurrency_range{1, 1, 1};
   uint64_t request_period{10};
   size_t warmup_request_count{0};
+
+  std::vector<float> schedule{};
 };
 
 using PAParamsPtr = std::shared_ptr<PerfAnalyzerParameters>;
diff --git a/src/custom_request_schedule_manager.cc b/src/custom_request_schedule_manager.cc
new file mode 100644
index 00000000..2ebf9538
--- /dev/null
+++ b/src/custom_request_schedule_manager.cc
@@ -0,0 +1,121 @@
+// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "custom_request_schedule_manager.h"
+
+namespace triton::perfanalyzer {
+
+cb::Error
+CustomRequestScheduleManager::Create(
+    const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
+    const std::shared_ptr<cb::ClientBackendFactory>& factory,
+    std::unique_ptr<LoadManager>* manager)
+{
+  std::unique_ptr<CustomRequestScheduleManager> local_manager(
+      new CustomRequestScheduleManager(params, parser, factory));
+
+  *manager = std::move(local_manager);
+
+  return cb::Error::Success;
+}
+
+CustomRequestScheduleManager::CustomRequestScheduleManager(
+    const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
+    const std::shared_ptr<cb::ClientBackendFactory>& factory)
+    : RequestRateManager(
+          params->async, params->streaming, Distribution::CUSTOM,
+          params->batch_size, params->measurement_window_ms, params->max_trials,
+          params->max_threads, params->num_of_sequences,
+          params->shared_memory_type, params->output_shm_size,
+          params->serial_sequences, parser, factory,
+          params->request_parameters),
+      schedule_(params->schedule)
+{
+}
+
+cb::Error
+CustomRequestScheduleManager::PerformWarmup(
+    double request_rate, size_t warmup_request_count)
+{
+  if (warmup_request_count == 0) {
+    return cb::Error::Success;
+  }
+  RETURN_IF_ERROR(ChangeRequestRate(request_rate, warmup_request_count));
+  WaitForWarmupAndCleanup();
+  return cb::Error::Success;
+}
+
+cb::Error
+CustomRequestScheduleManager::ChangeRequestRate(
+    const double request_rate, const size_t request_count)
+{
+  PauseWorkers();
+  ConfigureThreads(request_count);
+  GenerateSchedule(request_rate, schedule_);
+  ResumeWorkers();
+
+  return cb::Error::Success;
+}
+
+void
+CustomRequestScheduleManager::GenerateSchedule(
+    const double request_rate, const std::vector<float>& schedule)
+{
+  std::vector<float> scaled_schedule;
+  scaled_schedule.reserve(schedule.size());
+  if (schedule.size() > 0) {
+    for (const auto& value : schedule) {
+      scaled_schedule.push_back(value / static_cast<float>(request_rate));
+    }
+  }
+  auto worker_schedules = CreateWorkerSchedules(schedule);
+  GiveSchedulesToWorkers(worker_schedules);
+}
+
+std::vector<RateSchedulePtr_t>
+CustomRequestScheduleManager::CreateWorkerSchedules(
+    const std::vector<float>& schedule)
+{
+  std::vector<RateSchedulePtr_t> worker_schedules =
+      CreateEmptyWorkerSchedules();
+  std::vector<size_t> thread_ids{CalculateThreadIds()};
+  std::chrono::nanoseconds next_timestamp(0);
+  size_t thread_id_index = 0;
+  size_t worker_index = 0;
+
+  for (const float& val : schedule) {
+    next_timestamp = std::chrono::duration_cast<std::chrono::nanoseconds>(
+        std::chrono::duration<float>(val));
+    worker_index = thread_ids[thread_id_index];
+    thread_id_index = ++thread_id_index % thread_ids.size();
+    worker_schedules[worker_index]->intervals.emplace_back(next_timestamp);
+  }
+  SetScheduleDurations(worker_schedules);
+
+  return worker_schedules;
+}
+
+}  // namespace triton::perfanalyzer
diff --git a/src/custom_request_schedule_manager.h b/src/custom_request_schedule_manager.h
new file mode 100644
index 00000000..38d25c82
--- /dev/null
+++ b/src/custom_request_schedule_manager.h
@@ -0,0 +1,109 @@
+// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "command_line_parser.h"
+#include "load_manager.h"
+#include "request_rate_manager.h"
+
+namespace triton::perfanalyzer {
+
+//==============================================================================
+/// CustomRequestScheduleManager is a helper class to send inference requests to
+/// inference server in accordance with the schedule set by the user.
+///
+/// Detail:
+/// An instance of this load manager will be created at the beginning of the
+/// perf analyzer and it will be used to schedule to send requests at that
+/// particular second defined by the user. The particular seconds at which a
+/// request should be sent can be set by the user using the `schedule` option.
+/// For example, if the `schedule` is set to 1,2,4,5,6.5,
+/// CustomRequestScheduleManager sends request at 1st second, 2nd second, 4th
+/// second and so on.
+///
+
+class CustomRequestScheduleManager : public RequestRateManager {
+ public:
+  ~CustomRequestScheduleManager() = default;
+
+  /// Creates an object of CustomRequestScheduleManager
+  /// \param params A PAParamsPtr (std::shared_ptr<PerfAnalyzerParameters>) that
+  /// holds configuration parameters to create CustomRequestScheduleManager
+  /// object
+  ///
+  static cb::Error Create(
+      const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
+      const std::shared_ptr<cb::ClientBackendFactory>& factory,
+      std::unique_ptr<LoadManager>* manager);
+
+  /// Performs warmup for benchmarking by sending a fixed number of requests
+  /// according to the specified request rate
+  /// \param request_rate The rate at which requests must be issued to the
+  /// server \param warmup_request_count The number of warmup requests to send
+  /// \return cb::Error object indicating success or failure
+  cb::Error PerformWarmup(
+      double request_rate, size_t warmup_request_count) override;
+
+  /// Adjusts the rate of issuing requests to be the same as 'request_rate'
+  /// \param request_rate The rate at which requests must be issued to the
+  /// server \param request_count The number of requests to generate when
+  /// profiling \return cb::Error object indicating success or failure
+  cb::Error ChangeRequestRate(
+      const double request_rate, const size_t request_count) override;
+
+
+ protected:
+  /// Constructor for CustomRequestScheduleManager
+  ///
+  /// Initializes a CustomRequestScheduleManager instance using a PAParamsPtr
+  /// object that contains all necessary parameters for request scheduling.
+  ///
+  /// \param params A PAParamsPtr (std::shared_ptr<PerfAnalyzerParameters>) that
+  /// holds configuration parameters to create CustomRequestScheduleManager
+  /// object
+  ///
+  CustomRequestScheduleManager(
+      const pa::PAParamsPtr& params, const std::shared_ptr<ModelParser>& parser,
+      const std::shared_ptr<cb::ClientBackendFactory>& factory);
+
+  /// Generates and updates the request schedule as per the given request rate
+  /// and schedule \param request_rate The request rate to use for new schedule
+  /// \param schedule The vector containing the schedule for requests
+  void GenerateSchedule(
+      const double request_rate, const std::vector<float>& schedule);
+
+  /// Creates worker schedules based on the provided schedule
+  /// \param duration The maximum duration for the schedule
+  /// \param schedule The vector containing the schedule for requests
+  /// \return A vector of RateSchedulePtr_t representing the worker schedules
+  std::vector<RateSchedulePtr_t> CreateWorkerSchedules(
+      const std::vector<float>& schedule);
+
+  /// The vector containing the schedule for requests
+  std::vector<float> schedule_;
+};
+
+}  // namespace triton::perfanalyzer
diff --git a/src/inference_profiler.cc b/src/inference_profiler.cc
index 9f425e36..bb51e201 100644
--- a/src/inference_profiler.cc
+++ b/src/inference_profiler.cc
@@ -40,7 +40,7 @@
 #include "constants.h"
 #include "doctest.h"
 
-namespace triton { namespace perfanalyzer {
+namespace triton::perfanalyzer {
 cb::Error
 ReportPrometheusMetrics(const Metrics& metrics)
 {
@@ -622,6 +622,7 @@ InferenceProfiler::Profile(
   is_stable = false;
   meets_threshold = true;
 
+
   RETURN_IF_ERROR(dynamic_cast<RequestRateManager*>(manager_.get())
                       ->PerformWarmup(request_rate, warmup_request_count));
   RETURN_IF_ERROR(dynamic_cast<RequestRateManager*>(manager_.get())
@@ -1872,4 +1873,4 @@ InferenceProfiler::MergeMetrics(
   return cb::Error::Success;
 }
 
-}}  // namespace triton::perfanalyzer
+}  // namespace triton::perfanalyzer
diff --git a/src/inference_profiler.h b/src/inference_profiler.h
index 206a8449..d9c9e595 100644
--- a/src/inference_profiler.h
+++ b/src/inference_profiler.h
@@ -39,6 +39,7 @@
 #include "concurrency_manager.h"
 #include "constants.h"
 #include "custom_load_manager.h"
+#include "custom_request_schedule_manager.h"
 #include "metrics.h"
 #include "metrics_manager.h"
 #include "model_parser.h"
@@ -47,7 +48,7 @@
 #include "profile_data_collector.h"
 #include "request_rate_manager.h"
 
-namespace triton { namespace perfanalyzer {
+namespace triton::perfanalyzer {
 
 #ifndef DOCTEST_CONFIG_DISABLE
 class NaggyMockInferenceProfiler;
@@ -443,6 +444,7 @@ class InferenceProfiler {
       std::vector<PerfStatus>& perf_statuses, bool& meets_threshold,
       bool& is_stable);
 
+
   /// A helper function for profiling functions.
   /// \param status_summary Returns the summary of the measurement.
   /// \param request_count The number of requests to generate when profiling. If
@@ -829,4 +831,4 @@ class InferenceProfiler {
 #endif
 };
 
-}}  // namespace triton::perfanalyzer
+}  // namespace triton::perfanalyzer
diff --git a/src/perf_analyzer.cc b/src/perf_analyzer.cc
index fd3e8ecc..bd1015b2 100644
--- a/src/perf_analyzer.cc
+++ b/src/perf_analyzer.cc
@@ -26,6 +26,7 @@
 
 #include "perf_analyzer.h"
 
+#include "custom_request_schedule_manager.h"
 #include "perf_analyzer_exception.h"
 #include "periodic_concurrency_manager.h"
 #include "report_writer.h"
@@ -33,7 +34,7 @@
 
 namespace pa = triton::perfanalyzer;
 
-namespace triton { namespace perfanalyzer {
+namespace triton::perfanalyzer {
 
 volatile bool early_exit = false;
 
@@ -52,7 +53,7 @@ SignalHandler(int signum)
     exit(0);
   }
 }
-}}  // namespace triton::perfanalyzer
+}  // namespace triton::perfanalyzer
 
 PerfAnalyzer::PerfAnalyzer(pa::PAParamsPtr params) : params_(params)
 {
@@ -238,15 +239,23 @@ PerfAnalyzer::CreateAnalyzerObjects()
           << "may occur." << std::endl;
       throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
     }
-    FAIL_IF_ERR(
-        pa::RequestRateManager::Create(
-            params_->async, params_->streaming, params_->measurement_window_ms,
-            params_->max_trials, params_->request_distribution,
-            params_->batch_size, params_->max_threads,
-            params_->num_of_sequences, params_->shared_memory_type,
-            params_->output_shm_size, params_->serial_sequences, parser_,
-            factory, &manager, params_->request_parameters),
-        "failed to create request rate manager");
+    if (!params_->schedule.empty()) {
+      FAIL_IF_ERR(
+          pa::CustomRequestScheduleManager::Create(
+              params_, parser_, factory, &manager),
+          "failed to create custom request schedule manager");
+    } else {
+      FAIL_IF_ERR(
+          pa::RequestRateManager::Create(
+              params_->async, params_->streaming,
+              params_->measurement_window_ms, params_->max_trials,
+              params_->request_distribution, params_->batch_size,
+              params_->max_threads, params_->num_of_sequences,
+              params_->shared_memory_type, params_->output_shm_size,
+              params_->serial_sequences, parser_, factory, &manager,
+              params_->request_parameters),
+          "failed to create request rate manager");
+    }
 
   } else {
     if ((params_->sequence_id_range != 0) &&
diff --git a/src/request_rate_manager.cc b/src/request_rate_manager.cc
index 454b11d9..118c4312 100644
--- a/src/request_rate_manager.cc
+++ b/src/request_rate_manager.cc
@@ -26,7 +26,7 @@
 
 #include "request_rate_manager.h"
 
-namespace triton { namespace perfanalyzer {
+namespace triton::perfanalyzer {
 
 RequestRateManager::~RequestRateManager()
 {
@@ -106,7 +106,6 @@ RequestRateManager::ChangeRequestRate(
 {
   PauseWorkers();
   ConfigureThreads(request_count);
-  // Can safely update the schedule
   GenerateSchedule(request_rate);
   ResumeWorkers();
 
@@ -153,7 +152,6 @@ RequestRateManager::CreateWorkerSchedules(
   size_t thread_id_index = 0;
   size_t worker_index = 0;
 
-
   // Generate schedule until we hit max_duration, but also make sure that all
   // worker schedules follow the thread id distribution
   //
@@ -314,4 +312,4 @@ RequestRateManager::DetermineNumThreads()
 }
 
 
-}}  // namespace triton::perfanalyzer
+}  // namespace triton::perfanalyzer
diff --git a/src/request_rate_manager.h b/src/request_rate_manager.h
index 6d5904a3..b8bba685 100644
--- a/src/request_rate_manager.h
+++ b/src/request_rate_manager.h
@@ -30,7 +30,7 @@
 #include "load_manager.h"
 #include "request_rate_worker.h"
 
-namespace triton { namespace perfanalyzer {
+namespace triton::perfanalyzer {
 
 #ifndef DOCTEST_CONFIG_DISABLE
 class TestRequestRateManager;
@@ -103,7 +103,7 @@ class RequestRateManager : public LoadManager {
   /// \param target_request_rate The rate at which requests must be issued to
   /// the server.
   /// \param warmup_request_count The number of warmup requests to send.
-  cb::Error PerformWarmup(
+  virtual cb::Error PerformWarmup(
       double target_request_rate, size_t warmup_request_count);
 
   /// Adjusts the rate of issuing requests to be the same as 'request_rate'
@@ -112,9 +112,10 @@ class RequestRateManager : public LoadManager {
   /// \param request_count The number of requests to generate when profiling. If
   /// 0, then there is no limit, and it will generate until told to stop.
   /// \return cb::Error object indicating success or failure.
-  cb::Error ChangeRequestRate(
+  virtual cb::Error ChangeRequestRate(
       const double target_request_rate, const size_t request_count = 0);
 
+
  protected:
   RequestRateManager(
       const bool async, const bool streaming, Distribution request_distribution,
@@ -175,4 +176,4 @@ class RequestRateManager : public LoadManager {
 #endif
 };
 
-}}  // namespace triton::perfanalyzer
+}  // namespace triton::perfanalyzer