phyto-arm

#!/usr/bin/env python3
'''
This script is used to launch the PhytO-ARM software.

It reads the system configuration from the provided config file and starts the
ROS nodes with the appropriate settings.

The nodes are mostly started by roslaunch, with the notable exception of
`rosbag record`, which we want to keep recording at all costs.
'''
import argparse
import atexit
import json
import os
import shlex
import subprocess
import sys
import urllib.request

import yaml

from scripts.config_validation import validate_config

# Load the virtual environment and Catkin workspace and return the resulting
# environment
def prep_environment():
    # Due to a long-standing bug in the setup.sh script, we must provide to the
    # devel/ directory in the _CATKIN_SETUP_DIR environment variable.
    #
    # This also needs to be an absolute path so that Python can locate packages.
    #
    # Ref: https://github.com/catkin/catkin_tools/issues/376
    parent_dir = os.path.dirname(__file__)
    setup_dir = os.path.abspath(os.path.join(parent_dir, 'devel'))
    env = {
        '_CATKIN_SETUP_DIR': setup_dir
    }

    # Build command to load the catkin workspace (and optionally a virtual
    # environment) and dump the environment.
    command = f'. {shlex.quote(setup_dir)}/setup.sh && env'

    if os.getenv('NO_VIRTUALENV') is None:
        command = f'. {shlex.quote(parent_dir)}/.venv/bin/activate && ' + \
                  command

    # Dump the environment and parse the output
    env_out = subprocess.check_output(command, shell=True, env=env)
    for line in env_out.rstrip().split(b'\n'):
        var, _, value = line.partition(b'=')
        env[var] = value

    return env


# Prepare a command and environment for roslaunch
def prep_roslaunch(config, env, package, launchfile):
    # Build the command-line arguments for roslaunch
    rl_args = [
        'roslaunch',
        '--wait',  # because we can start the ROS master ourselves
        '--required',  # stop if any process fails
        '--skip-log-check',
        package, launchfile
    ]
    rl_args.append(f'config_file:={os.path.abspath(args.config)}')

    for launch_arg, value in config.get('launch_args', {}).items():
        rl_args.append(f'{launch_arg}:={value}')

    # Allow the config to set a launch prefix (like gdb) for nodes. We have to
    # use an environment variable for this because roslaunch will error if an
    # argument is unset, while an environment variable is option.
    launch_prefix = config.get('launch_args', {}).get('launch_prefix')
    if launch_prefix is not None:
        env = dict(env)  # copy first
        env['LAUNCH_PREFIX'] = launch_prefix

    # Build the command to invoke as an escaped string
    # For straceing signals: strace -f -o signals.log -tt -e trace=none
    command = ' '.join(shlex.quote(a) for a in rl_args)
    return command, env


# Send alerts when the program stops
def send_alerts(alert_config, deployment, launch_name):
    for alert in alert_config:
        assert alert['type'] == 'slack' and alert['url']
        urllib.request.urlopen(
            alert['url'],
            json.dumps({
                'text': f'*PhytO-ARM process stopped*\n - Deployment: _{deployment}_ \n - Process: _{launch_name}_'
            }).encode()
        )


def attach(args):
    os.execl('/bin/sh', '/bin/sh', '-c', 'screen -r phyto-arm')


def _start(args):
    # Validate config file
    if not args.skip_validation:
        print(f'Validating config file {args.config} against {args.config_schema}...')
        if validate_config(args.config, args.config_schema):
            print('Config file is valid')
        else:
            sys.exit(1)

    # Load the config file
    with open(args.config, 'rb') as f:
        config = yaml.safe_load(f)

    # Prepare the environment
    env = prep_environment()

    # Define an atexit handler that can be used to terminate subprocesses.
    # Recall that these handlers are invoked in reverse order, so since we start
    # roscore first, it is last to be terminated.
    def cleanup(name, proc, dont_kill=False):
        if proc.returncode is not None:  # already dead
            return

        try:
            print(f'terminating {name} ({proc.pid})')
            proc.terminate()
            proc.wait(5.0)
        except subprocess.TimeoutExpired:
            if dont_kill:
                print(f'failed to terminate {name} ({proc.pid}), '
                        'but refusing to kill')
            else:
                print(f'failed to terminate {name} ({proc.pid}), killing')
                proc.kill()

            proc.wait()
            print(f'{name} ({proc.pid}) eventually exited')

    # Set up alerts for when we terminate
    atexit.register(send_alerts, config.get('alerts', []), config.get('name'), args.launch_name)

    # Allow the config to override where logs are stored
    log_dir = config.get('launch_args', {}).get('log_dir')
    if log_dir is not None:
        env['ROS_LOG_DIR'] = log_dir

    # The following should only launch once, with the main PhytO-ARM processes
    roscore = None
    if args.launch_name == "main":
        # Before we continue compress any older log files
        #print('Compressing older logs')
        #os.system(f'find {shlex.quote(log_dir)} -iname \*.log -exec gzip {{}} \;')

        # Start the ROS core processes (ROS master, etc.)
        roscore = subprocess.Popen('roscore', env=env)
        atexit.register(lambda: cleanup('roscore', roscore))

        # Start the rosbag logging process. We run this separately from the main
        # launch file so that if the main nodes crash, we don't terminate the rosbag
        # prematurely.
        command, env = prep_roslaunch(config, env, 'phyto_arm', 'rosbag.launch')
        rosbag = subprocess.Popen(command, shell=True, env=env)
        atexit.register(lambda: cleanup('rosbag record', rosbag, dont_kill=True))

    # Prepare our roslaunch command
    command, env = prep_roslaunch(config, env, 'phyto_arm', f'{args.launch_name}.launch')
    nodes = subprocess.Popen(command, shell=True, env=env)
    atexit.register(lambda: cleanup('nodes', nodes))

    # Wait for any child to terminate
    pid, status = os.wait()

    # Since we called wait() behind subprocess's back, we need to inform it that
    # the process terminated. A little hacky.
    for proc in [roscore, rosbag, nodes]:
        if proc is not None and proc.pid == pid:
            proc.returncode = os.WEXITSTATUS(status) \
                              if os.WIFEXITED(status) else -1

    # The atexit handlers will terminate the other subprocesses
    pass


def start(args):
    # Use an environment variable to bypass wrapping the process in screen
    if os.getenv('DONT_SCREEN'):
        return _start(args)
    os.environ['DONT_SCREEN'] = '1'

    # Check if there is a screen session running already
    try:
        subprocess.check_call(
            ['screen', '-list', 'phyto-arm'],
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
        )
        print('PhytO-ARM appears to be running already, aborting')
        return
    except subprocess.CalledProcessError:
        pass

    # Re-run this command in a screen session
    command = 'screen -L -dmS phyto-arm ' + \
              ' '.join(shlex.quote(a) for a in sys.argv)
    os.execl('/bin/sh', '/bin/sh', '-c', command)


def stop(args):
    os.execl('/bin/sh', '/bin/sh', '-c', 'screen -S phyto-arm -X quit')


if __name__ == '__main__':
    # Parse command-line arguments
    parser = argparse.ArgumentParser(description=__doc__)
    subparsers = parser.add_subparsers(dest='command', required=True)

    attach_parser = subparsers.add_parser('attach')

    start_parser = subparsers.add_parser('start')
    start_parser.add_argument('launch_name')
    start_parser.add_argument('config')
    start_parser.add_argument('--config_schema', default='./configs/example.yaml')
    start_parser.add_argument('--skip_validation', action='store_true')

    stop_parser = subparsers.add_parser('stop')

    args = parser.parse_args()

    # Invoke the handler for this subcommand
    {
        'attach': attach,
        'start': start,
        'stop':  stop,
    }[args.command](args)