From b488fb11100a9082359794b6bb56c6dffa598a1e Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Thu, 17 Jun 2021 11:36:42 +0200 Subject: [PATCH 1/2] Parser friendly, more complete logs Reaper status logs are printed even if we are killing a worker, and they show information for each worker. Information is presented in a key=value fashion which is way easier to parse, and correctly handled by most log analysis tools. Since this is breaking former log parsing, I've taken the liberty to also clarify logs, showing that master is included in the total. Signed-off-by: Ulysse Buonomo --- CHANGELOG.md | 3 +- README.md | 4 +-- lib/puma_worker_killer/puma_memory.rb | 12 +++++-- lib/puma_worker_killer/reaper.rb | 39 +++++++++++++---------- lib/puma_worker_killer/rolling_restart.rb | 2 +- 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb5dc01..8aca7c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## Main +- Parser friendly, more complete logs (#97) + ## 0.3.1 - Relax puma dependency (#94) @@ -43,4 +45,3 @@ ## 0.0.3 - Fix memory metrics in on linux - diff --git a/README.md b/README.md index 3c44312..5ce298e 100644 --- a/README.md +++ b/README.md @@ -129,13 +129,13 @@ PumaWorkerKiller.start By default Puma Worker Killer will emit a log when a worker is being killed ``` -PumaWorkerKiller: Out of memory. 5 workers consuming total: 500 mb out of max: 450 mb. Sending TERM to pid 23 consuming 53 mb. +PumaWorkerKiller: Out of memory. 5 workers and master consuming total: 500 mb out of max: 450 mb. Sending TERM to pid 23 consuming 53 mb. ``` or ``` -PumaWorkerKiller: Rolling Restart. 5 workers consuming total: 650mb mb. Sending TERM to pid 34. +PumaWorkerKiller: Rolling Restart. 5 workers and master consuming total: 650mb mb. Sending TERM to pid 34. ``` However you may want to collect more data, such as sending an event to an error collection service like rollbar or airbrake. The `pre_term` lambda gets called before any worker is killed by PWK for any reason. diff --git a/lib/puma_worker_killer/puma_memory.rb b/lib/puma_worker_killer/puma_memory.rb index a201d15..2ce19b0 100644 --- a/lib/puma_worker_killer/puma_memory.rb +++ b/lib/puma_worker_killer/puma_memory.rb @@ -49,9 +49,9 @@ def largest_worker_memory largest_memory_used end - # Will refresh @workers + # Will refresh @workers and @master_memory def get_total(workers = set_workers) - master_memory = GetProcessMem.new(Process.pid).mb + master_memory = set_master_memory worker_memory = workers.values.inject(:+) || 0 worker_memory + master_memory end @@ -61,6 +61,10 @@ def workers @workers || set_workers end + def master_memory + @master_memory || set_master_memory + end + private def get_master @@ -80,5 +84,9 @@ def set_workers {} end end + + def set_master_memory + @master_memory = GetProcessMem.new(Process.pid).mb + end end end diff --git a/lib/puma_worker_killer/reaper.rb b/lib/puma_worker_killer/reaper.rb index 25d943b..6c2f719 100644 --- a/lib/puma_worker_killer/reaper.rb +++ b/lib/puma_worker_killer/reaper.rb @@ -21,23 +21,30 @@ def reap total = get_total_memory @on_calculation&.call(total) - if total > @max_ram - @cluster.master.log "PumaWorkerKiller: Out of memory. #{@cluster.workers.count} workers consuming total: #{total} mb out of max: #{@max_ram} mb. Sending TERM to pid #{@cluster.largest_worker.pid} consuming #{@cluster.largest_worker_memory} mb." - - # Fetch the largest_worker so that both `@pre_term` and `term_worker` are called with the same worker - # Avoids a race condition where: - # Worker A consume 100 mb memory - # Worker B consume 99 mb memory - # pre_term gets called with Worker A - # A new request comes in, Worker B takes it, and consumes 101 mb memory - # term_largest_worker (previously here) gets called and terms Worker B (thus not passing the about-to-be-terminated worker to `@pre_term`) - largest_worker = @cluster.largest_worker - @pre_term&.call(largest_worker) - @cluster.term_worker(largest_worker) - - elsif @reaper_status_logs - @cluster.master.log "PumaWorkerKiller: Consuming #{total} mb with master and #{@cluster.workers.count} workers." + if @reaper_status_logs + @cluster.master.log 'PumaWorkerKiller: Status log. ' \ + "total=#{total}mb " \ + "master=#{@cluster.master_memory}md " \ + "worker_count=#{@cluster.workers.count} " \ + "#{@cluster.workers.map { |worker, mem| "worker_#{worker.pid}=#{mem}mb" } * " "}" end + + return unless total > @max_ram + + @cluster.master.log "PumaWorkerKiller: Out of memory. #{@cluster.workers.count} " \ + "workers and master consuming total: #{total} mb out of max: #{@max_ram} mb. " \ + "Sending TERM to pid #{@cluster.largest_worker.pid} consuming #{@cluster.largest_worker_memory} mb." + + # Fetch the largest_worker so that both `@pre_term` and `term_worker` are called with the same worker + # Avoids a race condition where: + # Worker A consume 100 mb memory + # Worker B consume 99 mb memory + # pre_term gets called with Worker A + # A new request comes in, Worker B takes it, and consumes 101 mb memory + # term_largest_worker (previously here) gets called and terms Worker B (thus not passing the about-to-be-terminated worker to `@pre_term`) + largest_worker = @cluster.largest_worker + @pre_term&.call(largest_worker) + @cluster.term_worker(largest_worker) end end end diff --git a/lib/puma_worker_killer/rolling_restart.rb b/lib/puma_worker_killer/rolling_restart.rb index 109b71d..c71bb43 100644 --- a/lib/puma_worker_killer/rolling_restart.rb +++ b/lib/puma_worker_killer/rolling_restart.rb @@ -18,7 +18,7 @@ def reap(seconds_between_worker_kill = 60) return false unless @cluster.running? @cluster.workers.each do |worker, _ram| - @cluster.master.log "PumaWorkerKiller: Rolling Restart. #{@cluster.workers.count} workers consuming total: #{total_memory} mb. Sending TERM to pid #{worker.pid}." + @cluster.master.log "PumaWorkerKiller: Rolling Restart. #{@cluster.workers.count} workers and master consuming total: #{total_memory} mb. Sending TERM to pid #{worker.pid}." @rolling_pre_term&.call(worker) worker.term From 9bc6f9c619515c29d25b74144c0eb39eeb25d704 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Fri, 18 Jun 2021 15:12:57 +0200 Subject: [PATCH 2/2] fix rubocop --- lib/puma_worker_killer/reaper.rb | 8 ++++++-- puma_worker_killer.gemspec | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/puma_worker_killer/reaper.rb b/lib/puma_worker_killer/reaper.rb index 6c2f719..19dfbcc 100644 --- a/lib/puma_worker_killer/reaper.rb +++ b/lib/puma_worker_killer/reaper.rb @@ -29,10 +29,14 @@ def reap "#{@cluster.workers.map { |worker, mem| "worker_#{worker.pid}=#{mem}mb" } * " "}" end - return unless total > @max_ram + kill(total) if total > @max_ram + end + + private + def kill(total_mem) @cluster.master.log "PumaWorkerKiller: Out of memory. #{@cluster.workers.count} " \ - "workers and master consuming total: #{total} mb out of max: #{@max_ram} mb. " \ + "workers and master consuming total: #{total_mem} mb out of max: #{@max_ram} mb. " \ "Sending TERM to pid #{@cluster.largest_worker.pid} consuming #{@cluster.largest_worker_memory} mb." # Fetch the largest_worker so that both `@pre_term` and `term_worker` are called with the same worker diff --git a/puma_worker_killer.gemspec b/puma_worker_killer.gemspec index 033329f..c39842c 100644 --- a/puma_worker_killer.gemspec +++ b/puma_worker_killer.gemspec @@ -3,6 +3,7 @@ lib = File.expand_path('lib', __dir__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'puma_worker_killer/version' +require 'english' Gem::Specification.new do |gem| gem.name = 'puma_worker_killer'