Skip to content

Commit

Permalink
exit main thread with same exit code as payment_producer (#713)
Browse files Browse the repository at this point in the history
* exit main thread with same exit code as payment_producer

#679
introduced support for exit codes, so an alert can be sent in
single-shot mode when payouts fail for any reason.

However, it was crude, only supporting exit code 1.

The producer thread supports many exit codes.

In this case, there is a benign issue where tzkt returns "not synced"
and therefore payouts fail, but this is likely temporary and will pass
at next try, so there is no need to alert. But, currently it's not
possible to behave differently based on the exit code because it's
always 0 or 1.

An ugly solution is to save the exit code of the child thread in a file,
then read it in the main thread. That's what I am doing here. I remain
convinced that the entire thread architecture needs to go away, and we
need to make TRD single threaded again, but that's for another day.

Also:
* change the exit code of misconfigured provider to GENERAL_ERROR
  because it's not really a provider error,
* change the help to remove old providers that we don't support anymore

* remove debug statement
  • Loading branch information
nicolasochem authored Jan 26, 2025
1 parent cdf7d38 commit 15c4659
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 10 deletions.
9 changes: 6 additions & 3 deletions src/pay/payment_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
logger = main_logger.getChild("payment_producer")

BOOTSTRAP_SLEEP = 4
EXIT_CODE_FILE = "/tmp/trd_producer_exit_code"


class PaymentProducer(threading.Thread, PaymentProducerABC):
Expand Down Expand Up @@ -117,7 +118,7 @@ def __init__(
logger.error(
"Only 'ACTUAL' rewards type is supported as of Paris protocol. Please fix your configuration."
)
self.exit(ExitCode.PROVIDER_ERROR)
self.exit(ExitCode.GENERAL_ERROR)
self.pay_denunciation_rewards = baking_cfg.get_pay_denunciation_rewards()
self.fee_calc = service_fee_calc
self.initial_payment_cycle = initial_payment_cycle
Expand Down Expand Up @@ -161,6 +162,9 @@ def exit(self, exit_code):
# This will propagate the exit status to main process on linux.
abnormal_signal = signal.SIGUSR2
normal_signal = signal.SIGUSR1
# write the exit code to file, so the main process can exit with the same code
with open(EXIT_CODE_FILE, "w") as f:
f.write(str(exit_code.value))
if self.consumer_failure:
os.kill(os.getpid(), abnormal_signal)
logger.debug(
Expand Down Expand Up @@ -226,7 +230,7 @@ def run(self):
str(self.provider_factory.provider), str(a)
)
)
self.exit(ExitCode.PROVIDER_ERROR)
self.exit(ExitCode.PROVIDER_BUSY)
return

# if initial_payment_cycle has the default value of -1 resulting in the last released cycle
Expand Down Expand Up @@ -292,7 +296,6 @@ def run(self):
# payments should not pass beyond last released reward cycle
if pymnt_cycle <= current_cycle - 1:
if not self.payments_queue.full():

# If user wants to offset payments within a cycle, check here
if level_in_cycle < self.payment_offset:
wait_offset_blocks = self.payment_offset - level_in_cycle
Expand Down
1 change: 1 addition & 0 deletions src/util/exit_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class ExitCode(Enum):
INSUFFICIENT_FUNDS = 6
RETRY_FAILED = 7
PROVIDER_ERROR = 8
PROVIDER_BUSY = 9


def exit_program(exit_code: ExitCode = ExitCode.SUCCESS, exit_message="Success!"):
Expand Down
8 changes: 2 additions & 6 deletions src/util/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,8 @@ def add_argument_provider(argparser):
argparser.add_argument(
"-P",
"--reward_data_provider",
help="Source of reward data. The default is 'tzkt' (TzKT API). "
"Set to 'rpc' to use your own local node defined with the -A flag, "
"(it must be an ARCHIVE node in this case). "
"Set to 'prpc' to use a public RPC node defined with the -Ap flag. "
"An alternative for providing reward data is 'tzpro', but an API key associated with your account needs to be provided in your configuration!",
choices=["rpc", "prpc", "tzpro", "tzkt"],
help="Source of reward data. The only choice is 'tzkt' (TzKT API).",
choices=["tzkt"],
default="tzkt",
)

Expand Down
23 changes: 22 additions & 1 deletion src/util/process_life_cycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import queue
import signal
import platform
import os
from signal import SIGABRT, SIGILL, SIGSEGV, SIGTERM

if platform.system() != "Windows":
Expand All @@ -19,7 +20,7 @@
from util.parser import parse_arguments
from model.baking_dirs import BakingDirs
from pay.payment_consumer import PaymentConsumer
from pay.payment_producer import PaymentProducer
from pay.payment_producer import EXIT_CODE_FILE, PaymentProducer
from util.config_life_cycle import ConfigLifeCycle
from util.lock_file import LockFile
from log_config import main_logger, init, verbose_logger
Expand Down Expand Up @@ -394,6 +395,26 @@ def producer_exit_handler(self, signum, frame):

def shut_down_on_error(self):
self.fsm.trigger_event(TrdEvent.SHUT_DOWN_ON_ERROR)
exit_code = ExitCode.GENERAL_ERROR
if os.path.exists(EXIT_CODE_FILE):
try:
with open(EXIT_CODE_FILE, "r") as f:
exit_code_value = int(f.read().strip())
exit_code = next(
(code for code in ExitCode if code.value == exit_code_value),
ExitCode.GENERAL_ERROR,
)
os.remove(EXIT_CODE_FILE)
exit_program(exit_code, "Shutdown initiated by producer.")
except Exception:
logger.debug(
"Error reading exit code file. Using default exit code GENERAL_ERROR."
)
else:
logger.debug(
"Exit code file not found. Using default exit code GENERAL_ERROR."
)

exit_program(ExitCode.GENERAL_ERROR, "Shutdown due to error!")

def shut_down_on_demand(self):
Expand Down

0 comments on commit 15c4659

Please sign in to comment.