From 15c4659ddf5a81df820c8bbbd85e0cbfe7c831f3 Mon Sep 17 00:00:00 2001 From: Nicolas Ochem Date: Sat, 25 Jan 2025 21:23:08 -0800 Subject: [PATCH] exit main thread with same exit code as payment_producer (#713) * exit main thread with same exit code as payment_producer https://github.com/tezos-reward-distributor-organization/tezos-reward-distributor/pull/679 introduced support for exit codes, so an alert can be sent in single-shot mode when payouts fail for any reason. However, it was crude, only supporting exit code 1. The producer thread supports many exit codes. In this case, there is a benign issue where tzkt returns "not synced" and therefore payouts fail, but this is likely temporary and will pass at next try, so there is no need to alert. But, currently it's not possible to behave differently based on the exit code because it's always 0 or 1. An ugly solution is to save the exit code of the child thread in a file, then read it in the main thread. That's what I am doing here. I remain convinced that the entire thread architecture needs to go away, and we need to make TRD single threaded again, but that's for another day. Also: * change the exit code of misconfigured provider to GENERAL_ERROR because it's not really a provider error, * change the help to remove old providers that we don't support anymore * remove debug statement --- src/pay/payment_producer.py | 9 ++++++--- src/util/exit_program.py | 1 + src/util/parser.py | 8 ++------ src/util/process_life_cycle.py | 23 ++++++++++++++++++++++- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/pay/payment_producer.py b/src/pay/payment_producer.py index 0e4f3ec8..d60fc76b 100644 --- a/src/pay/payment_producer.py +++ b/src/pay/payment_producer.py @@ -25,6 +25,7 @@ logger = main_logger.getChild("payment_producer") BOOTSTRAP_SLEEP = 4 +EXIT_CODE_FILE = "/tmp/trd_producer_exit_code" class PaymentProducer(threading.Thread, PaymentProducerABC): @@ -117,7 +118,7 @@ def __init__( logger.error( "Only 'ACTUAL' rewards type is supported as of Paris protocol. Please fix your configuration." ) - self.exit(ExitCode.PROVIDER_ERROR) + self.exit(ExitCode.GENERAL_ERROR) self.pay_denunciation_rewards = baking_cfg.get_pay_denunciation_rewards() self.fee_calc = service_fee_calc self.initial_payment_cycle = initial_payment_cycle @@ -161,6 +162,9 @@ def exit(self, exit_code): # This will propagate the exit status to main process on linux. abnormal_signal = signal.SIGUSR2 normal_signal = signal.SIGUSR1 + # write the exit code to file, so the main process can exit with the same code + with open(EXIT_CODE_FILE, "w") as f: + f.write(str(exit_code.value)) if self.consumer_failure: os.kill(os.getpid(), abnormal_signal) logger.debug( @@ -226,7 +230,7 @@ def run(self): str(self.provider_factory.provider), str(a) ) ) - self.exit(ExitCode.PROVIDER_ERROR) + self.exit(ExitCode.PROVIDER_BUSY) return # if initial_payment_cycle has the default value of -1 resulting in the last released cycle @@ -292,7 +296,6 @@ def run(self): # payments should not pass beyond last released reward cycle if pymnt_cycle <= current_cycle - 1: if not self.payments_queue.full(): - # If user wants to offset payments within a cycle, check here if level_in_cycle < self.payment_offset: wait_offset_blocks = self.payment_offset - level_in_cycle diff --git a/src/util/exit_program.py b/src/util/exit_program.py index 68883306..92673efd 100644 --- a/src/util/exit_program.py +++ b/src/util/exit_program.py @@ -13,6 +13,7 @@ class ExitCode(Enum): INSUFFICIENT_FUNDS = 6 RETRY_FAILED = 7 PROVIDER_ERROR = 8 + PROVIDER_BUSY = 9 def exit_program(exit_code: ExitCode = ExitCode.SUCCESS, exit_message="Success!"): diff --git a/src/util/parser.py b/src/util/parser.py index a5d8f676..b45d8a99 100644 --- a/src/util/parser.py +++ b/src/util/parser.py @@ -107,12 +107,8 @@ def add_argument_provider(argparser): argparser.add_argument( "-P", "--reward_data_provider", - help="Source of reward data. The default is 'tzkt' (TzKT API). " - "Set to 'rpc' to use your own local node defined with the -A flag, " - "(it must be an ARCHIVE node in this case). " - "Set to 'prpc' to use a public RPC node defined with the -Ap flag. " - "An alternative for providing reward data is 'tzpro', but an API key associated with your account needs to be provided in your configuration!", - choices=["rpc", "prpc", "tzpro", "tzkt"], + help="Source of reward data. The only choice is 'tzkt' (TzKT API).", + choices=["tzkt"], default="tzkt", ) diff --git a/src/util/process_life_cycle.py b/src/util/process_life_cycle.py index cb7e7f5c..59ab4183 100644 --- a/src/util/process_life_cycle.py +++ b/src/util/process_life_cycle.py @@ -3,6 +3,7 @@ import queue import signal import platform +import os from signal import SIGABRT, SIGILL, SIGSEGV, SIGTERM if platform.system() != "Windows": @@ -19,7 +20,7 @@ from util.parser import parse_arguments from model.baking_dirs import BakingDirs from pay.payment_consumer import PaymentConsumer -from pay.payment_producer import PaymentProducer +from pay.payment_producer import EXIT_CODE_FILE, PaymentProducer from util.config_life_cycle import ConfigLifeCycle from util.lock_file import LockFile from log_config import main_logger, init, verbose_logger @@ -394,6 +395,26 @@ def producer_exit_handler(self, signum, frame): def shut_down_on_error(self): self.fsm.trigger_event(TrdEvent.SHUT_DOWN_ON_ERROR) + exit_code = ExitCode.GENERAL_ERROR + if os.path.exists(EXIT_CODE_FILE): + try: + with open(EXIT_CODE_FILE, "r") as f: + exit_code_value = int(f.read().strip()) + exit_code = next( + (code for code in ExitCode if code.value == exit_code_value), + ExitCode.GENERAL_ERROR, + ) + os.remove(EXIT_CODE_FILE) + exit_program(exit_code, "Shutdown initiated by producer.") + except Exception: + logger.debug( + "Error reading exit code file. Using default exit code GENERAL_ERROR." + ) + else: + logger.debug( + "Exit code file not found. Using default exit code GENERAL_ERROR." + ) + exit_program(ExitCode.GENERAL_ERROR, "Shutdown due to error!") def shut_down_on_demand(self):