From 93dc2e5fa843776e45a0b029b2b6f48ed9e6d87a Mon Sep 17 00:00:00 2001
From: lxw <lxw263044@alibaba-inc.com>
Date: Tue, 23 Jul 2024 19:18:27 +0800
Subject: [PATCH] [GC] Add IO related size policy for Parallel GC Summary: add
 new parallel gc option UseIOPrioritySizePolicy to early shrink heap when io
 wait is high.

Testing: jtreg

Reviewers: maoliang.ml, yude.lyd

Issue: https://github.com/dragonwell-project/dragonwell11/issues/846

CR: https://github.com/dragonwell-project/dragonwell11/pull/847
---
 .../share/gc/parallel/parallel_globals.hpp    |   5 +-
 .../gc/parallel/psAdaptiveSizePolicy.cpp      | 282 +++++++++++++++++-
 .../gc/parallel/psAdaptiveSizePolicy.hpp      |  13 +
 src/hotspot/share/runtime/arguments.cpp       |  15 +
 4 files changed, 308 insertions(+), 7 deletions(-)
diff --git a/src/hotspot/share/gc/parallel/parallel_globals.hpp b/src/hotspot/share/gc/parallel/parallel_globals.hpp
index 5461bf04f32..5fc1467f8e2 100644
--- a/src/hotspot/share/gc/parallel/parallel_globals.hpp
+++ b/src/hotspot/share/gc/parallel/parallel_globals.hpp
@@ -78,6 +78,9 @@
           "Delay in scheduling GC workers (in milliseconds)")               \
                                                                             \
   product(bool, PSChunkLargeArrays, true,                                   \
-          "Process large arrays in chunks")
+          "Process large arrays in chunks")                                 \
+                                                                            \
+  product(bool, UseIOPrioritySizePolicy, true,                              \
+          "eagerly decrease heap when io wait is high")                     \
 
 #endif // SHARE_GC_PARALLEL_PARALLEL_GLOBALS_HPP
diff --git a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp
index 6377ec229cc..a92db7e0480 100644
--- a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp
+++ b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp
@@ -37,6 +37,231 @@
 
 #include <math.h>
 
+class IOPolicy : public CHeapObj<mtGC> {
+  class IOWaitRecord {
+  public:
+    IOWaitRecord(bool *active) : _active(active), _last_cpu_total(0), _last_cpu_iowait(0) {}
+    void start() {
+      fill_value(&_last_cpu_total, &_last_cpu_iowait);
+    }
+    double stop() {
+      size_t total, iowait;
+      fill_value(&total, &iowait);
+
+      size_t total_diff = total - _last_cpu_total;
+      size_t iowait_diff = iowait - _last_cpu_iowait;
+      if (total_diff == 0) {
+        log_debug(gc, ergo, heap)("fail to record, cpu total diff is 0");
+        return 0;
+      } else {
+        return (double)iowait_diff / (double)total_diff;
+      }
+    }
+  private:
+    // if anything unexpected happened during record, we will deactivate the policy
+    bool *_active;
+    size_t _last_cpu_total;
+    size_t _last_cpu_iowait;
+    void fill_value(size_t *total, size_t *iowait) {
+      FILE *file = fopen("/proc/stat", "r");
+      if (file == NULL) {
+        log_warning(gc, ergo, heap)("Deactivate UseIOPrioritySizePolicy due to failed to open cpu stat");
+        *_active = false;
+        return;
+      }
+
+      char line[256];
+
+      if (fgets(line, sizeof(line), file)) {
+        size_t user, nice, system, idle, iowait_time, irq, softirq, steal, guest, guest_nice;
+        sscanf(line, "cpu  %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+               &user, &nice, &system, &idle, &iowait_time, &irq, &softirq, &steal, &guest, &guest_nice);
+
+        *total = user + nice + system + idle + iowait_time + irq + softirq + steal + guest + guest_nice;
+        *iowait = iowait_time;
+      } else {
+        log_warning(gc, ergo, heap)("Deactivate UseIOPrioritySizePolicy due to failed to parse cpu stat");
+        *_active = false;
+      }
+      fclose(file);
+    }
+  };
+
+  class UserTimeRecord {
+  public:
+    UserTimeRecord(bool *active) : _active(active), _starting_user_time(0), _starting_system_time(0), _starting_real_time(0) {}
+    void start() {
+      if (!os::getTimesSecs(&_starting_real_time, &_starting_user_time, &_starting_system_time)) {
+        log_warning(gc, ergo, heap)("Deactivate UseIOPrioritySizePolicy due to failed to get cpu times");
+        *_active = false;
+      }
+    }
+    double stop() {
+      const static double INVALID = 99999;
+      double real_time, user_time, system_time;
+      if (!os::getTimesSecs(&real_time, &user_time, &system_time)) {
+        log_warning(gc, ergo, heap)("Deactivate UseIOPrioritySizePolicy due to failed to get cpu times");
+        *_active = false;
+        return INVALID;
+      }
+      double user_diff = user_time - _starting_user_time;
+      double real_diff = real_time - _starting_real_time;
+      // too short interval to calculate a meaningful user time percent, thus we
+      // return a very large number to avoid trigger memory reduction.
+      if (real_diff < 0.00001) {
+        log_debug(gc, ergo, heap)("fail to record, real_duration too small: %f", real_diff);
+        return INVALID;
+      }
+      return user_diff / real_diff;
+    }
+  private:
+    // if anything unexpected happened during record, we will deactivate the policy
+    bool *_active;
+    double _starting_user_time;
+    double _starting_system_time;
+    double _starting_real_time;
+  };
+
+  double _mutator_iowait_percent;
+  double _mutator_user_percent;
+  elapsedTimer _io_triggerred_major_gc_timer;
+
+  IOWaitRecord _io_wait_record;
+  UserTimeRecord _user_time_record;
+
+  bool _active;
+  bool _should_reduce_heap;
+
+public:
+  IOPolicy() :
+      _mutator_iowait_percent(0.0),
+      _mutator_user_percent(0.0),
+      _io_triggerred_major_gc_timer(),
+      _io_wait_record(&_active),
+      _user_time_record(&_active),
+      _active(true),
+      _should_reduce_heap(false)
+  {
+    _io_triggerred_major_gc_timer.start();
+    start_mutator_record();
+    if (FLAG_IS_CMDLINE(NewSize)) {
+      log_debug(gc, ergo, heap)("NewSize or Xmn is set, which may introduce a large size for min young size");
+    }
+    if (MaxHeapSize == InitialHeapSize) {
+      log_debug(gc, ergo, heap)("Xmx is equal to Xms, which may introduce a large size for min young size");
+    }
+    log_debug(gc, ergo, heap)("min size: young " SIZE_FORMAT "M, old " SIZE_FORMAT "M. "
+                              "IOPrioritySizePolicy can't decrease heap below these sizes",
+                              ParallelScavengeHeap::young_gen()->min_gen_size()/M,
+                              ParallelScavengeHeap::old_gen()->min_gen_size()/M);
+  }
+
+  void start_mutator_record() {
+    if (!_active) {
+      return;
+    }
+    _io_wait_record.start();
+    _user_time_record.start();
+  }
+
+  void stop_mutator_record() {
+    if (!_active) {
+      return;
+    }
+    _mutator_iowait_percent = _io_wait_record.stop();
+    _mutator_user_percent = _user_time_record.stop();
+  }
+
+  void print(double mutator_cost) const {
+    if (!_active) {
+      return;
+    }
+    log_debug(gc, ergo, heap)("mutator cost: %f, iowait : %f, user: %f", mutator_cost, _mutator_iowait_percent, _mutator_user_percent);
+  }
+
+  bool should_full_GC() {
+    if (!_active) {
+      return false;
+    }
+
+    // These thresholds are turned by spark on TPC-DS workload.
+    const static double IOTriggerredFullGCUserThreshold = 0.75;
+    const static double IOTriggerredFullGCIOWaitThreshold = 0.4;
+    const static double IOTriggerredFullGCMinInterval = 60; // can be set longer if io heavy workload lasts long.
+
+    if (_mutator_user_percent < IOTriggerredFullGCUserThreshold &&
+        _mutator_iowait_percent > IOTriggerredFullGCIOWaitThreshold) {
+      _io_triggerred_major_gc_timer.stop();
+      if (_io_triggerred_major_gc_timer.seconds() >
+          IOTriggerredFullGCMinInterval) {
+        _io_triggerred_major_gc_timer.reset();
+        _io_triggerred_major_gc_timer.start();
+        log_debug(gc, ergo, heap)("decrease old gen by full gc");
+        return true;
+      } else {
+        log_debug(gc, ergo, heap)(
+            "decrease old gen FAILED because interval is %f < %f",
+            _io_triggerred_major_gc_timer.seconds(), IOTriggerredFullGCMinInterval);
+        _io_triggerred_major_gc_timer.start();
+        return false;
+      }
+    }
+    return false;
+  }
+
+  double calculate_reduced_throughput_goal(double throughput_goal) {
+    if (!_active) {
+      return throughput_goal;
+    }
+
+    const static double UserThreshold = 1.0;
+    const static double IOWaitThreshold = 0.1;
+
+    if (_mutator_user_percent < UserThreshold &&
+        _mutator_iowait_percent > IOWaitThreshold) {
+      double reduced_throughput_goal = throughput_goal - (1 - _mutator_user_percent);
+      _should_reduce_heap = true;
+      log_debug(gc, ergo, heap)("decrease throughput goal to %.3f", reduced_throughput_goal);
+      return reduced_throughput_goal;
+    } else {
+      _should_reduce_heap = false;
+      return throughput_goal;
+    }
+  }
+
+  size_t calculate_reduced_eden_size(size_t eden_size, float avg_survivor, size_t current_eden_size) const {
+    if (!_active || !_should_reduce_heap) {
+      return eden_size;
+    }
+    size_t reduced_size;
+    reduced_size = MIN(eden_size, avg_survivor * SurvivorRatio);
+    reduced_size = MAX(reduced_size, ParallelScavengeHeap::heap()->young_gen()->max_size() / 10);
+    log_debug(gc, ergo, heap)(
+        "decrease eden from " SIZE_FORMAT "M to " SIZE_FORMAT "M , "
+        "survivor avg: %fM, min threshold: " SIZE_FORMAT "M",
+        current_eden_size/M, reduced_size/M, avg_survivor/M,
+        ParallelScavengeHeap::heap()->young_gen()->max_size()/10/M);
+    return reduced_size;
+  }
+
+  size_t calculate_reduced_promo_size(size_t promo_size, float avg_promo, size_t current_promo_size) const {
+    if (!_active || !_should_reduce_heap) {
+      return promo_size;
+    }
+    const static float PromoRatio = 5;
+    size_t reduced_size;
+    reduced_size = MIN(reduced_size, avg_promo * PromoRatio);
+    reduced_size = MAX(reduced_size,
+                       ParallelScavengeHeap::heap()->old_gen()->max_gen_size() / 10);
+    log_debug(gc, ergo, heap)(
+        "decrease promotion from " SIZE_FORMAT "M to " SIZE_FORMAT "M , "
+        "promo avg: %fM, min threshold: " SIZE_FORMAT "M",
+        current_promo_size/M, reduced_size/M, avg_promo/M,
+        ParallelScavengeHeap::heap()->old_gen()->max_gen_size()/10/M);
+    return reduced_size;
+  }
+};
+
 PSAdaptiveSizePolicy::PSAdaptiveSizePolicy(size_t init_eden_size,
                                            size_t init_promo_size,
                                            size_t init_survivor_size,
@@ -54,6 +279,7 @@ PSAdaptiveSizePolicy::PSAdaptiveSizePolicy(size_t init_eden_size,
      _live_at_last_full_gc(init_promo_size),
      _gc_minor_pause_goal_sec(gc_minor_pause_goal_sec),
      _latest_major_mutator_interval_seconds(0),
+     _dynamic_throughput_goal(_throughput_goal),
      _young_gen_change_for_major_pause_count(0)
 {
   // Sizing policy statistics
@@ -75,6 +301,9 @@ PSAdaptiveSizePolicy::PSAdaptiveSizePolicy(size_t init_eden_size,
 
   // Start the timers
   _major_timer.start();
+  if (UseIOPrioritySizePolicy) {
+    _io_policy = new IOPolicy();
+  }
 }
 
 size_t PSAdaptiveSizePolicy::calculate_free_based_on_live(size_t live, uintx ratio_as_percentage) {
@@ -109,6 +338,21 @@ size_t PSAdaptiveSizePolicy::calculated_old_free_size_in_bytes() const {
   return free_size;
 }
 
+void PSAdaptiveSizePolicy::minor_collection_begin() {
+  AdaptiveSizePolicy::minor_collection_begin();
+  if (UseIOPrioritySizePolicy) {
+    _io_policy->stop_mutator_record();
+  }
+}
+
+void PSAdaptiveSizePolicy::minor_collection_end(GCCause::Cause gc_cause) {
+  AdaptiveSizePolicy::minor_collection_end(gc_cause);
+  if (UseIOPrioritySizePolicy) {
+    _io_policy->start_mutator_record();
+    _io_policy->print(adjusted_mutator_cost());
+  }
+}
+
 void PSAdaptiveSizePolicy::major_collection_begin() {
   // Update the interval time
   _major_timer.stop();
@@ -129,6 +373,9 @@ void PSAdaptiveSizePolicy::major_collection_end(size_t amount_live,
   GCCause::Cause gc_cause) {
   // Update the pause time.
   _major_timer.stop();
+  if (UseIOPrioritySizePolicy) {
+    _io_policy->start_mutator_record();
+  }
 
   if (should_update_promo_stats(gc_cause)) {
     double major_pause_in_seconds = _major_timer.seconds();
@@ -168,6 +415,10 @@ void PSAdaptiveSizePolicy::major_collection_end(size_t amount_live,
     assert(collection_cost >= 0.0, "Expected to be non-negative");
     _major_collection_estimator->update(promo_size_in_mbytes,
         collection_cost);
+
+    if (UseIOPrioritySizePolicy) {
+      _io_policy->print(adjusted_mutator_cost());
+    }
   }
 
   // Update the amount live at the end of a full GC
@@ -183,6 +434,11 @@ void PSAdaptiveSizePolicy::major_collection_end(size_t amount_live,
 // that expected to be needed by the next collection, do a full
 // collection now.
 bool PSAdaptiveSizePolicy::should_full_GC(size_t old_free_in_bytes) {
+  if (UseIOPrioritySizePolicy) {
+    if (_io_policy->should_full_GC()) {
+      return true;
+    }
+  }
 
   // A similar test is done in the scavenge's should_attempt_scavenge().  If
   // this is changed, decide if that test should also be changed.
@@ -233,6 +489,10 @@ void PSAdaptiveSizePolicy::compute_eden_space_size(
                                            size_t max_eden_size,
                                            bool   is_full_gc) {
 
+  if (UseIOPrioritySizePolicy) {
+    _dynamic_throughput_goal = _io_policy->calculate_reduced_throughput_goal(_throughput_goal);
+  }
+
   // Update statistics
   // Time statistics are updated as we go, update footprint stats here
   _avg_base_footprint->sample(BaseFootPrintEstimate);
@@ -313,7 +573,7 @@ void PSAdaptiveSizePolicy::compute_eden_space_size(
     // Adjust only for the minor pause time goal
     adjust_eden_for_minor_pause_time(is_full_gc, &desired_eden_size);
 
-  } else if(adjusted_mutator_cost() < _throughput_goal) {
+  } else if(adjusted_mutator_cost() < _dynamic_throughput_goal) {
     // This branch used to require that (mutator_cost() > 0.0 in 1.4.2.
     // This sometimes resulted in skipping to the minimize footprint
     // code.  Change this to try and reduce GC time if mutator time is
@@ -380,7 +640,7 @@ void PSAdaptiveSizePolicy::compute_eden_space_size(
   }
 
   log_debug(gc, ergo)("PSAdaptiveSizePolicy::compute_eden_space_size: costs minor_time: %f major_cost: %f mutator_cost: %f throughput_goal: %f",
-             minor_gc_cost(), major_gc_cost(), mutator_cost(), _throughput_goal);
+             minor_gc_cost(), major_gc_cost(), mutator_cost(), _dynamic_throughput_goal);
 
   log_trace(gc, ergo)("Minor_pause: %f major_pause: %f minor_interval: %f major_interval: %fpause_goal: %f",
                       _avg_minor_pause->padded_average(),
@@ -484,7 +744,7 @@ void PSAdaptiveSizePolicy::compute_old_gen_free_space(
       set_decide_at_full_gc(decide_at_full_gc_true);
       adjust_promo_for_pause_time(is_full_gc, &desired_promo_size, &desired_eden_size);
     }
-  } else if (adjusted_mutator_cost() < _throughput_goal) {
+  } else if (adjusted_mutator_cost() < _dynamic_throughput_goal) {
     // This branch used to require that (mutator_cost() > 0.0 in 1.4.2.
     // This sometimes resulted in skipping to the minimize footprint
     // code.  Change this to try and reduce GC time if mutator time is
@@ -551,7 +811,7 @@ void PSAdaptiveSizePolicy::compute_old_gen_free_space(
 
   // Timing stats
   log_debug(gc, ergo)("PSAdaptiveSizePolicy::compute_old_gen_free_space: costs minor_time: %f major_cost: %f  mutator_cost: %f throughput_goal: %f",
-             minor_gc_cost(), major_gc_cost(), mutator_cost(), _throughput_goal);
+             minor_gc_cost(), major_gc_cost(), mutator_cost(), _dynamic_throughput_goal);
 
   log_trace(gc, ergo)("Minor_pause: %f major_pause: %f minor_interval: %f major_interval: %f pause_goal: %f",
                       _avg_minor_pause->padded_average(),
@@ -758,7 +1018,7 @@ void PSAdaptiveSizePolicy::adjust_promo_for_throughput(bool is_full_gc,
 
     log_trace(gc, ergo)("Adjusting tenured gen for throughput (avg %f goal %f). desired_promo_size " SIZE_FORMAT " promo_delta " SIZE_FORMAT ,
                         mutator_cost(),
-                        _throughput_goal,
+                        _dynamic_throughput_goal,
                         *desired_promo_size_ptr, scaled_promo_heap_delta);
   }
 }
@@ -842,7 +1102,7 @@ void PSAdaptiveSizePolicy::adjust_eden_for_throughput(bool is_full_gc,
   }
 
     log_trace(gc, ergo)("Adjusting eden for throughput (avg %f goal %f). desired_eden_size " SIZE_FORMAT " eden delta " SIZE_FORMAT,
-                        mutator_cost(), _throughput_goal, *desired_eden_size_ptr, scaled_eden_heap_delta);
+                        mutator_cost(), _dynamic_throughput_goal, *desired_eden_size_ptr, scaled_eden_heap_delta);
 }
 
 size_t PSAdaptiveSizePolicy::adjust_promo_for_footprint(
@@ -855,6 +1115,11 @@ size_t PSAdaptiveSizePolicy::adjust_promo_for_footprint(
 
   size_t reduced_size = desired_promo_size - change;
 
+  if (UseIOPrioritySizePolicy) {
+    reduced_size = _io_policy->calculate_reduced_promo_size(reduced_size, avg_promoted()->average(), desired_promo_size);
+    change = desired_promo_size - reduced_size;
+  }
+
   log_trace(gc, ergo)(
     "AdaptiveSizePolicy::adjust_promo_for_footprint "
     "adjusting tenured gen for footprint. "
@@ -877,6 +1142,11 @@ size_t PSAdaptiveSizePolicy::adjust_eden_for_footprint(
 
   size_t reduced_size = desired_eden_size - change;
 
+  if (UseIOPrioritySizePolicy) {
+    reduced_size = _io_policy->calculate_reduced_eden_size(reduced_size, avg_survived()->average(), desired_eden_size);
+    change = desired_eden_size - reduced_size;
+  }
+
   log_trace(gc, ergo)(
     "AdaptiveSizePolicy::adjust_eden_for_footprint "
     "adjusting eden for footprint. "
diff --git a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp
index 4d7090947d0..fd8aad089c0 100644
--- a/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp
+++ b/src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.hpp
@@ -54,6 +54,7 @@
 
 // Forward decls
 class elapsedTimer;
+class IOPolicy;
 
 class PSAdaptiveSizePolicy : public AdaptiveSizePolicy {
  friend class PSGCAdaptivePolicyCounters;
@@ -107,6 +108,10 @@ class PSAdaptiveSizePolicy : public AdaptiveSizePolicy {
   // increase/decrease the young generation for major pause time
   int _change_young_gen_for_maj_pauses;
 
+  // it's dynamic only if UseIOPrioritySizePolicy. otherwise it's same as GCTimeRatio.
+  double _dynamic_throughput_goal;
+  IOPolicy *_io_policy;
+
   // Changing the generation sizing depends on the data that is
   // gathered about the effects of changes on the pause times and
   // throughput.  These variable count the number of data points
@@ -227,6 +232,14 @@ class PSAdaptiveSizePolicy : public AdaptiveSizePolicy {
                        double gc_minor_pause_goal_sec,
                        uint gc_time_ratio);
 
+  void minor_collection_begin();
+  void minor_collection_end(GCCause::Cause gc_cause);
+
+  static bool should_update_eden_stats(GCCause::Cause cause) {
+    return AdaptiveSizePolicy::should_update_eden_stats(cause) ||
+           (UseIOPrioritySizePolicy && GCCause::_gc_locker == cause);
+  }
+
   // Methods indicating events of interest to the adaptive size policy,
   // called by GC algorithms. It is the responsibility of users of this
   // policy to call these methods at the correct times!
diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp
index 1e8b7319bb3..90d3d34e3e5 100644
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@@ -4115,6 +4115,21 @@ jint Arguments::parse(const JavaVMInitArgs* initial_cmd_args) {
     DumpAppCDSWithKlassId = true;
   }
 
+  if (UseIOPrioritySizePolicy) {
+    if (!UseAdaptiveSizePolicy) {
+      // The following flags MUST be set for UseIOPrioritySizePolicy
+      if (FLAG_IS_CMDLINE(UseIOPrioritySizePolicy)) {
+        warning("UseIOPrioritySizePolicy requires UseAdaptiveSizePolicy to be enabled");
+      }
+      UseIOPrioritySizePolicy = false;
+    } else {
+      // The following flags are good to be set for better performance, but not mandatory
+      FLAG_SET_ERGO_IF_DEFAULT(uintx, AdaptiveSizePolicyWeight, 30);
+      FLAG_SET_ERGO_IF_DEFAULT(bool, UseAdaptiveSizePolicyWithSystemGC, true);
+      FLAG_SET_ERGO_IF_DEFAULT(uintx, MaxHeapFreeRatio, 100);
+    }
+  }
+
   // Set object alignment values.
   set_object_alignment();