Skip to content

Commit

Permalink
Always resume automatic sessions on remote (BugFix) (canonical#859)
Browse files Browse the repository at this point in the history
* add mermaid flowchart of agent boot

* always resume automated sessions via remote

* fix restart logic requiring SA_RESTARTABLE

* update the url to the stable PPA

* improve coverage of remote assistant code

* add more coverage to the remote assistant

* py35 friendly assertion

* add coverage for sa.config property

* outline checking for open port

* add tests for registering agents args

* more agent unit tests

* more agent unit tests

* add coverage for the job type when resuming

* add tests for SessionAssistantAgent

* tweaks after Max's suggestions

* enable mermaid in sphinx

* translate the mermaid doc from md to rst

* remove the md version of the agent bootup diagram

* add paragraph describing agent resume

* smaller diamond

* fix typo and quote code better

* exclude mermaid blocks from spellcheck; sort list

* surrender to pyspelling

* use better words for system hangs

* correct the exclusion of mermaid content in spelling check

* properly handle no launcher in app_blob + UT

* bring back gettext call and mock it in UT

* smaller diamond redux

* move to function-level mock

* remove "padme" from the lxd_profile once again

* remove unnecessary debs from MB profile

* remove bionic from the special conditions

* use simpler comparison
  • Loading branch information
kissiel authored and binli committed Mar 22, 2024
1 parent 0a26367 commit 3eae986
Show file tree
Hide file tree
Showing 15 changed files with 726 additions and 166 deletions.
149 changes: 93 additions & 56 deletions checkbox-ng/checkbox_ng/launcher/agent.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# This file is part of Checkbox.
#
# Copyright 2017-2019 Canonical Ltd.
# Copyright 2017-2023 Canonical Ltd.
# Written by:
# Maciej Kisielewski <[email protected]>
#
# Checkbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3,
Expand All @@ -20,11 +19,15 @@
functionality.
"""
import gettext
import json
import logging
import os
import socket
import sys
from checkbox_ng import app_context
from plainbox.impl.config import Configuration
from plainbox.impl.secure.sudo_broker import is_passwordless_sudo
from plainbox.impl.session.assistant import ResumeCandidate
from plainbox.impl.session.remote_assistant import RemoteSessionAssistant
from plainbox.impl.session.restart import RemoteDebRestartStrategy
from plainbox.impl.session.restart import RemoteSnappyRestartStrategy
Expand All @@ -36,7 +39,6 @@


class SessionAssistantAgent(rpyc.Service):

session_assistant = None
controlling_controller_conn = None
controller_blaster = None
Expand All @@ -58,10 +60,14 @@ def exposed_register_controller_blaster(self, callable):
def on_connect(self, conn):
try:
if SessionAssistantAgent.controller_blaster:
msg = 'Forcefully disconnected by new controller from {}:{}'.format(
conn._config['endpoints'][1][0], conn._config['endpoints'][1][1])
msg = "Forcefully disconnected by new controller from {}:{}".format(
conn._config["endpoints"][1][0],
conn._config["endpoints"][1][1],
)
SessionAssistantAgent.controller_blaster(msg)
old_controller = SessionAssistantAgent.controlling_controller_conn
old_controller = (
SessionAssistantAgent.controlling_controller_conn
)
if old_controller is not None:
old_controller.close()
SessionAssistantAgent.controller_blaster = None
Expand All @@ -82,7 +88,7 @@ def on_disconnect(self, conn):
self.controlling_controller_conn = None


class RemoteAgent():
class RemoteAgent:
"""
Run checkbox instance as a agent
Expand All @@ -91,76 +97,107 @@ class RemoteAgent():
part should be run on system-under-test.
"""

name = 'agent'
name = "agent"

def invoked(self, ctx):
if os.geteuid():
raise SystemExit(_("Checkbox agent must be run by root!"))
if not is_passwordless_sudo():
raise SystemExit(
_("System is not configured to run sudo without a password!"))
_("System is not configured to run sudo without a password!")
)
if ctx.args.resume:
msg = (
"--resume is deprecated and will be removed soon. "
"Automated sessions are now always resumed. "
"Manual sessions can be resumed from the welcome screen."
)
_logger.warning(msg)

agent_port = ctx.args.port

# Check if able to connect to the agent port as indicator of there
# already being a agent running
def agent_port_open():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(0.5)
result = sock.connect_ex(('127.0.0.1', agent_port))
sock.close()
return result
if agent_port_open() == 0:
raise SystemExit(_("Found port {} is open. Is Checkbox agent"
" already running?").format(agent_port))
exit_if_port_unavailable(agent_port)

# The RemoteSessionAssistant required a callable that was used to
# start the agent as a part of restart strategy. We don't need that
# functionality, so we just pass a dummy callable that will never be
# called. It's left in to avoid breaking the API.
SessionAssistantAgent.session_assistant = RemoteSessionAssistant(
lambda s: [sys.argv[0] + 'agent'])
snap_data = os.getenv('SNAP_DATA')
snap_rev = os.getenv('SNAP_REVISION')
remote_restart_strategy_debug = os.getenv('REMOTE_RESTART_DEBUG')
if (snap_data and snap_rev) or ctx.args.resume:
if remote_restart_strategy_debug:
strategy = RemoteSnappyRestartStrategy(debug=True)
else:
strategy = RemoteSnappyRestartStrategy()
if os.path.exists(strategy.session_resume_filename):
with open(strategy.session_resume_filename, 'rt') as f:
session_id = f.readline()
SessionAssistantAgent.session_assistant.resume_by_id(
session_id)
elif ctx.args.resume:
# XXX: explicitly passing None to not have to bump Remote API
# TODO: remove on the next Remote API bump
SessionAssistantAgent.session_assistant.resume_by_id(None)
else:
_logger.info("RemoteDebRestartStrategy")
if remote_restart_strategy_debug:
strategy = RemoteDebRestartStrategy(debug=True)
else:
strategy = RemoteDebRestartStrategy()
if os.path.exists(strategy.session_resume_filename):
with open(strategy.session_resume_filename, 'rt') as f:
session_id = f.readline()
_logger.info(
"RemoteDebRestartStrategy resume_by_id %r", session_id)
lambda x: None
)

# the agent is meant to be run only as a service,
# and we always resume if the session was automated,
# so we don't need to encode check whether we should resume

sessions = list(ctx.sa.get_resumable_sessions())
if sessions:
# the sessions are ordered by time, so the first one is the most
# recent one
if is_the_session_noninteractive(sessions[0]):
SessionAssistantAgent.session_assistant.resume_by_id(
session_id)
sessions[0].id
)

self._server = ThreadedServer(
SessionAssistantAgent,
port=agent_port,
protocol_config={
"allow_all_attrs": True,
"allow_setattr": True,
"sync_request_timeout": 1,
"propagate_SystemExit_locally": True
"propagate_SystemExit_locally": True,
},
)
SessionAssistantAgent.session_assistant.terminate_cb = (
self._server.close)
self._server.close
)
self._server.start()

def register_arguments(self, parser):
parser.add_argument('--resume', action='store_true', help=_(
"resume last session"))
parser.add_argument('--port', type=int, default=18871, help=_(
"port to listen on"))
parser.add_argument(
"--resume", action="store_true", help=_("resume last session")
)
parser.add_argument(
"--port", type=int, default=18871, help=_("port to listen on")
)


def is_the_session_noninteractive(
resumable_session: "ResumeCandidate",
) -> bool:
"""
Check if given session is non-interactive.
To determine that we need to take the original launcher that had been used
when the session was started, recreate it as a proper Launcher object, and
check if it's in fact non-interactive.
"""
# app blob is a bytes string with a utf-8 encoded json
# let's decode it and parse it as json
app_blob = json.loads(resumable_session.metadata.app_blob.decode("utf-8"))
launcher = Configuration.from_text(
app_blob.get("launcher", ""), "resumed session"
)
return launcher.sections["ui"].get("type") == "silent"


def exit_if_port_unavailable(port: int) -> None:
"""
Check if the port is available and exit if it's not.
This is used by the agent to check if it's already running.
"""
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(0.5)
result = sock.connect_ex(("127.0.0.1", port))
sock.close()
# the result is 0 if the port is open (which means the low level
# connect() call succeeded), and 1 if it's closed (which means the low
# level connect() call failed)
if result == 0:
raise SystemExit(
_(
"Found port {} is open. Is Checkbox agent" " already running?"
).format(port)
)
9 changes: 2 additions & 7 deletions checkbox-ng/checkbox_ng/launcher/checkbox_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def main():
"slave": "run-agent",
"service": "run-agent",
"master": "control",
"remote": "control"
"remote": "control",
}

known_cmds = list(commands.keys())
Expand Down Expand Up @@ -141,12 +141,7 @@ def main():
subcmd = commands[args.subcommand]()
subcmd.register_arguments(subcmd_parser)
sub_args = subcmd_parser.parse_args(sys.argv[subcmd_index + 1 :])
sa = SessionAssistant(
"com.canonical:checkbox-cli",
"0.99",
"0.99",
["restartable"],
)
sa = SessionAssistant()
ctx = Context(sub_args, sa)
try:
socket.getaddrinfo("localhost", 443) # 443 for HTTPS
Expand Down
Loading

0 comments on commit 3eae986

Please sign in to comment.