Skip to content

Commit

Permalink
[crmsh-4.5] Fix: Raise an exception as a rapid return of ssh-related …
Browse files Browse the repository at this point in the history
…operations to prevent hang (bsc#1228899) (#1508)

## Problem
In some environments with a network firewall dropping ssh packets, any
ssh operation could hang.

## Solution
- Add the 'core.no_ssh' option in crm.conf. 
  "no" is the default. 
"yes" means all ssh-related operations are disabled and will directly
return on purpose.
- Define a 'utils.NoSSHError' exception and an ssh wrapper function.
Raise the exception when `no_ssh` is `yes`.
- Catch `utils.NoSSHError` in `main` so that SSH-related commands will
log a general error message: `ERROR: ssh-related operations are
disabled. crm report works in local mode.`
- Catch 'utils.NoSSHError' when running certain commands that call ssh,
and provide a more user-friendly message.
- `upgradeutil`, which is called regardless of the command being called,
will return without calling ssh, when `no_ssh` is `yes`.
- Other commands that are not related to SSH will continue to function
normally.

## SSH related commands
- crm cluster stop --all
- crm cluster start --all
- crm report
  • Loading branch information
liangxin1300 authored Aug 22, 2024
2 parents f3808ff + e87d325 commit 9ac849e
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 7 deletions.
1 change: 1 addition & 0 deletions crmsh/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def get(self, value):
'ignore_missing_metadata': opt_boolean('no'),
'report_tool_options': opt_string(''),
'lock_timeout': opt_string('120'),
'no_ssh': opt_boolean('no'),
'OCF_1_1_SUPPORT': opt_boolean('no'),
'obscure_pattern': opt_string('passw*')
},
Expand Down
2 changes: 2 additions & 0 deletions crmsh/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,4 +535,6 @@
RSC_ROLE_PROMOTED_LEGACY = "Master"
RSC_ROLE_UNPROMOTED_LEGACY = "Slave"
PCMK_VERSION_DEFAULT = "2.0.0"

NO_SSH_ERROR_MSG = "ssh-related operations are disabled. crmsh works in local mode."
# vim:ts=4:sw=4:et:
3 changes: 3 additions & 0 deletions crmsh/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,9 @@ def run():
else:
upgradeutil.upgrade_if_needed()
return main_input_loop(context, user_args)
except utils.NoSSHError as msg:
logger.error('%s', msg)
sys.exit(1)
except KeyboardInterrupt:
if config.core.debug:
raise
Expand Down
9 changes: 7 additions & 2 deletions crmsh/report/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,13 @@ def collect_ratraces():
# need to parse crmsh log file to extract custom trace ra log directory on each node
log_contents = ""
cmd = "grep 'INFO: Trace for .* is written to ' {}*|grep -v 'collect'".format(log.CRMSH_LOG_FILE)
for node in crmutils.list_cluster_nodes():
log_contents += crmutils.get_stdout_or_raise_error(cmd, remote=node, no_raise=True) + "\n"

if utillib.local_mode():
log_contents = crmutils.get_stdout_or_raise_error(cmd, no_raise=True) + "\n"
else:
for node in crmutils.list_cluster_nodes():
log_contents += crmutils.get_stdout_or_raise_error(cmd, remote=node, no_raise=True) + "\n"

trace_dir_str = ' '.join(list(set(re.findall("written to (.*)/.*", log_contents))))
if not trace_dir_str:
return
Expand Down
5 changes: 5 additions & 0 deletions crmsh/report/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def dump_env():
env_dict["EXTRA_LOGS"] = constants.EXTRA_LOGS
env_dict["PCMK_LOG"] = constants.PCMK_LOG
env_dict["VERBOSITY"] = int(config.report.verbosity) or (1 if config.core.debug else 0)
env_dict["NO_SSH"] = constants.NO_SSH

res_str = ""
for k, v in env_dict.items():
Expand Down Expand Up @@ -133,6 +134,7 @@ def load_env(env_str):
constants.SKIP_LVL = utillib.str_to_bool(env_dict["SKIP_LVL"])
constants.EXTRA_LOGS = env_dict["EXTRA_LOGS"]
constants.PCMK_LOG = env_dict["PCMK_LOG"]
constants.NO_SSH = env_dict["NO_SSH"] == "True"
config.report.verbosity = env_dict["VERBOSITY"]


Expand Down Expand Up @@ -258,6 +260,9 @@ def run():
constants.THIS_IS_NODE = 1

if not is_collector():
if not utillib.local_mode() and config.core.no_ssh:
logger.error("ssh-related operations are disabled. crm report works in local mode.")
constants.NO_SSH = True
if constants.THIS_IS_NODE != 1:
logger.warning("this is not a node and you didn't specify a list of nodes using -n")
#
Expand Down
7 changes: 7 additions & 0 deletions crmsh/report/utillib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1604,4 +1604,11 @@ def sub_sensitive_string(data):
patt = '|'.join([re.escape(s) for s in constants.SANITIZE_VALUE_CIB])
result = re.sub('({})({})'.format('|'.join(constants.SANITIZE_KEY_CIB), patt), '\\1******', result)
return result


def local_mode():
"""
Check if -S option is set or if the node list is just the local node
"""
return constants.NO_SSH or constants.NODES == constants.WE
# vim:ts=4:sw=4:et:
20 changes: 15 additions & 5 deletions crmsh/ui_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,15 @@ def do_start(self, context, *args):
service_check_list.append("corosync-qdevice.service")

node_list = parse_option_for_nodes(context, *args)
for node in node_list[:]:
if all([utils.service_is_active(srv, remote_addr=node) for srv in service_check_list]):
logger.info("The cluster stack already started on {}".format(node))
node_list.remove(node)
try:
for node in node_list[:]:
if all([utils.service_is_active(srv, remote_addr=node) for srv in service_check_list]):
logger.info("The cluster stack already started on {}".format(node))
node_list.remove(node)
except utils.NoSSHError as msg:
logger.error('%s', msg)
logger.info("Please try 'crm cluster start' on each node")
return
if not node_list:
return

Expand Down Expand Up @@ -249,7 +254,12 @@ def do_stop(self, context, *args):
Stops the cluster stack on all nodes or specific node(s)
'''
node_list = parse_option_for_nodes(context, *args)
node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)]
try:
node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)]
except utils.NoSSHError as msg:
logger.error('%s', msg)
logger.info("Please try 'crm cluster stop' on each node")
return
if not node_list:
return
logger.debug(f"stop node list: {node_list}")
Expand Down
4 changes: 4 additions & 0 deletions crmsh/upgradeutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import crmsh.healthcheck
import crmsh.parallax
import crmsh.utils
import crmsh.config
import crmsh.constants
from crmsh.prun import prun


Expand Down Expand Up @@ -151,6 +153,8 @@ def ask(msg: str):


def upgrade_if_needed():
if crmsh.config.core.no_ssh:
return
if os.geteuid() != 0:
return
if not crmsh.utils.can_ask(background_wait=False):
Expand Down
18 changes: 18 additions & 0 deletions crmsh/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def user_of(self, host):

def user_pair_for_ssh(self, host: str) -> typing.Tuple[str, str]:
"""Return (local_user, remote_user) pair for ssh connection"""
if config.core.no_ssh:
raise NoSSHError(constants.NO_SSH_ERROR_MSG)

local_user = None
remote_user = None
try:
Expand Down Expand Up @@ -3606,4 +3609,19 @@ def parse_user_at_host(s: str):
else:
return s[:i], s[i+1:]


class NoSSHError(Exception):
pass


def ssh_command():
"""
Wrapper function for ssh command
When ssh between cluster nodes is blocked, core.no_ssh
should be set to 'yes', then this function will raise NoSSHError
"""
if config.core.no_ssh:
raise NoSSHError(constants.NO_SSH_ERROR_MSG)
return "ssh"
# vim:ts=4:sw=4:et:
1 change: 1 addition & 0 deletions etc/crm.conf.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
; ignore_missing_metadata = no
; report_tool_options =
; lock_timeout = 120
; no_ssh = no

; set OCF_1_1_SUPPORT to yes is to fully turn on OCF 1.1 feature once the corresponding CIB detected.
; OCF_1_1_SUPPORT = yes
Expand Down

0 comments on commit 9ac849e

Please sign in to comment.