diff --git a/conda/meta.yaml b/conda/meta.yaml index fb500a1c..9a51d328 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -12,6 +12,9 @@ build: - payu-collate = payu.subcommands.collate_cmd:runscript - payu-profile = payu.subcommands.profile_cmd:runscript - payu-sync = payu.subcommands.sync_cmd:runscript + - payu-branch = payu.subcommands.branch_cmd:runscript + - payu-clone = payu.subcommands.clone_cmd:runscript + - payu-checkout = payu.subcommands.checkout_cmd:runscript source: git_url: ../ @@ -34,6 +37,8 @@ requirements: # extra for the pypi package - pyOpenSSL >=0.14 - cryptography>=1.3.4 + - GitPython >= 3.1.40 + - ruamel.yaml >= 0.18.5 test: imports: diff --git a/docs/source/config.rst b/docs/source/config.rst index a701fab5..9d8fc95a 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -229,10 +229,10 @@ configuration. The control path for the experiment. The default setting is the path of the current working directory. -``experiment`` (*Default: current directory*) - The experiment name used for archival. The default setting uses the - ``control`` directory name. - +``experiment`` + The experiment name used for archival. This will override the experiment + name generated using metadata and existing archives + (see :ref:`usage-metadata`). Manifests --------- @@ -465,6 +465,28 @@ Postprocessing Create or update a bare git repository clone of the run history, called ``git-runlog``, in the remote archive directory. +Experiment Tracking +------------------- + +``runlog`` + Automatically commits changes to configuration files and manifests in the + *control directory* when the model runs. This creates a git runlog of the + history of the experiment. + + ``enable`` (*Default:* ``True``) + Flag to enable/disable runlog. + +``metadata`` + Generates and updates metadata files and unique experiment IDs (UUIDs). For more details, see + :ref:`usage-metadata`. + + ``enable`` (*Default:* ``True``) + Flag to enable/disable creating/updating metadata files and UUIDs. + If set to False, the UUID is left out of the experiment name used + for archival. + + ``model`` (*Default: The configured model value*) + Model name used when generating metadata for new experiments. Miscellaneous ============= diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 773bbf55..881ab4bc 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -21,6 +21,16 @@ simultaneously that can share common executables and input data. It also allows the flexibility to have the relatively small control directories in a location that is continuously backed up. +Using a git repository for the experiment +----------------------------------------- + +It is recommended to use the git_ version control system for the payu +*control directory*. This allows the experiment to be easily copied via +cloning. There is inbuilt support in payu for an experiment runlog which +uses git to track changes to configuration files between experiment runs. There are payu commands +for creating and moving between git branches so multiple related experiments +can be run from the same control directory. + Setting up the laboratory ========================= @@ -85,22 +95,44 @@ Populate laboratory directories You will want a unique name for each input directory. -Clone experiment ------------------ - -The payu control directory is maintained under version control using -git_ so existing experiments can be cloned. This is the best way to copy -an experiment as it guarantees that only the required files are copied -to a new control directory, and maintains a link to the original -experiment through the shared git history. +Clone experiment +---------------- + +Cloning is the best way to copy an experiment as it guarantees that only the +required files are copied to a new control directory, and maintains a link +to the original experiment through the shared git history. To clone the +repository, you can use ``payu clone``. This is a wrapper around ``git clone`` +which additionally creates or updates the metadata file which gets copied to +the experiment archive directory (see :ref:`usage-metadata`). For example:: mkdir -p ${HOME}/${MODEL} cd ${HOME}/${MODEL} - git clone https://github.com/payu-org/mom-example.git my_expt + payu clone ${REPOSITORY} my_expt cd my_expt +Where ``${REPOSITORY}`` is the git URL or path of the repository to clone from, +for example, https://github.com/payu-org/mom-example.git. + +To clone and checkout an existing git branch, use the ``--branch`` flag and +specify the branch name:: + + payu clone --branch ${EXISTING_BRANCH} ${REPOSITORY} my_expt + +To create and checkout a new git branch use ``--new-branch`` and specify a +new branch name: + + payu clone --new-branch ${NEW_BRANCH} ${REPOSITORY} my_expt + +To see more configuration options for ``payu clone``, +run:: + + payu clone --help + +As an alternative to creating and checking out branches with ``payu clone``, +``payu checkout`` can be used instead (see :ref:`usage-metadata`). + Create experiment ----------------- @@ -305,3 +337,107 @@ at a later date. To sync all restarts including the latest restarts, use the ``--sync-restarts`` flag:: payu sync --sync-restarts + +.. _usage-metadata: + +Metadata and Related Experiments +================================ + +Metadata files +-------------- + +Each experiment has a metadata file, called ``metadata.yaml`` in the *control +directory*. This contains high-level metadata about the experiment and uses +the ACCESS-NRI experiment schema_. An important field is the ``experiment_uuid`` +which uniquely identifies the experiment. Payu generates a new UUID when: + +* Using payu to clone a pre-existing git_ repository of the *control directory* + +* Using payu to create and checkout a new git branch in the *control directory* + +* Or, when setting up an experiment run if there is not a pre-existing metadata + file, UUID, or experiment ``archive`` directory. + +For new experiments, payu may generate some additional metadata fields. This +includes an experiment name, creation date, contact, and email if defined in +the git configuration. This also includes parent experiment UUID if starting +from restarts and the experiment UUID is defined in metadata of the parent directory +containing the restart. + +Once a metadata file is created or updated, it is copied to the directory +that stores the archived experiment outputs. + +.. _schema: https://github.com/ACCESS-NRI/schema/blob/main/experiment_asset.json + +Experiment names +---------------- + +An experiment name is used to identify the experiment inside the ``work`` and +``archive`` sub-directories inside the *laboratory*. + +The experiment name historically would default to the name of the *control +directory*. This is still supported for experiments with pre-existing +archived outputs. To support git branches and ensure uniqueness in shared +archives, the new default behaviour is to add the branch name and a short +version of the experiment UUID to the name of the *control directory* when +creating experiment names. + +For example, given a control directory named +``my_expt`` and a UUID of ``416af8c6-d299-4ee6-9d77-4aefa8a9ebcb``, +the experiment name would be: + +* ``my_expt-perturb-416af8c6`` - if running an experiment on a branch named + ``perturb``. + +* ``my_expt-416af8c6`` - if the control directory was not a git repository or + experiment was run from the ``main`` or ``master`` git branch. + +To preserve backwards compatibility, if there's a pre-existing archive under +the *control directory* name, this will remain the experiment name (e.g. +``my_expt`` in the above example). Similarly, if the ``experiment`` value is +configured (see :ref:`config`), this will be used for the experiment name. + +Switching between related experiments +------------------------------------- + +To be able to run related experiments from the same control directory +using git branches, you can use ``payu checkout`` which is a wrapper around +``git checkout``. Creating new branches will generate a new UUID, update metadata +files, and create a branch-UUID-aware experiment name in ``archive``. +Switching branches will change ``work`` and ``archive`` symlinks in the control +directory to point to directories in *laboratory* if they exist. + +To create a git branch for a new experiment, use the ``-b`` flag. +For example, to create and checkout a new branch called ``perturb1``, run:: + + payu checkout -b perturb1 + +To create a new experiment from an existing branch, specify the branch name +or a commit hash after the new branch name. For example, +the following creates a new experiment branch called ``perturb2`` +that starts from ``perturb1``:: + + payu checkout -b perturb2 perturb1 + +To specify a restart path to start from, use the ``--restart``/ ``-r`` flag, +for example:: + + payu checkout -b perturb --restart path/to/restart + +Note: This can also be achieved by configuring ``restart`` (see :ref:`config`). + +To checkout and switch to an existing branch and experiment, omit the ``-b`` flag. +For example, the following checks out the ``perturb1`` branch:: + + payu checkout perturb1 + +To see more ``payu checkout`` options, run:: + + payu checkout --help + +For more information on git branches that exist in the control directory +repository, run:: + + payu branch # Display local branches UUIDs + payu branch --verbose # Display local branches metadata + payu branch --remote # Display remote branches UUIDs diff --git a/payu/branch.py b/payu/branch.py new file mode 100644 index 00000000..adaed3a0 --- /dev/null +++ b/payu/branch.py @@ -0,0 +1,371 @@ +"""Experiment branch support for payu's branch, clone and checkout commands + +This may generate new experiment ID, updates, sets any +specified configuration in config.yaml and updates work/archive symlinks + +:copyright: Copyright 2011 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +import os +import warnings +from pathlib import Path +from typing import Optional + +from ruamel.yaml import YAML, CommentedMap +import git + +from payu.fsops import read_config, DEFAULT_CONFIG_FNAME, list_archive_dirs +from payu.laboratory import Laboratory +from payu.metadata import Metadata, UUID_FIELD, METADATA_FILENAME +from payu.git_utils import GitRepository, git_clone + + +NO_CONFIG_FOUND_MESSAGE = """No configuration file found on this branch. +Skipping adding new metadata file and creating archive/work symlinks. + +To find a branch that has a config file, you can: + - Display local branches by running: + payu branch + - Or display remote branches by running: + payu branch --remote + +To checkout an existing branch, run: + payu checkout BRANCH_NAME +Where BRANCH_NAME is the name of the branch""" + + +def check_restart(restart_path: Optional[Path], + archive_path: Path) -> Optional[Path]: + """Checks for valid prior restart path. Returns resolved restart path + if valid, otherwise returns None""" + + # Check for valid path + if not restart_path.exists(): + warnings.warn((f"Given restart path {restart_path} does not " + f"exist. Skipping setting 'restart' in config file")) + return + + # Resolve to absolute path + restart_path = restart_path.resolve() + + # Check for pre-existing restarts in archive + if archive_path.exists(): + if len(list_archive_dirs(archive_path, dir_type="restart")) > 0: + warnings.warn(( + f"Pre-existing restarts found in archive: {archive_path}." + f"Skipping adding 'restart: {restart_path}' to config file")) + return + + return restart_path + + +def add_restart_to_config(restart_path: Path, config_path: Path) -> None: + """Takes restart path and config path, and add 'restart' flag to the + config file - which is used to start a run if there isn't a pre-existing + restart in archive""" + + # Default ruamel yaml preserves comments and multiline strings + yaml = YAML() + config = yaml.load(config_path) + + # Add in restart path + config['restart'] = str(restart_path) + + # Write modified lines back to config + yaml.dump(config, config_path) + print(f"Added 'restart: {restart_path}' to configuration file:", + config_path.name) + + +def get_control_path(config_path: Path) -> Path: + """Given the config path, return the control path""" + # Note: Control path is set in read_config + config = read_config(config_path) + return Path(config.get('control_path')) + + +def check_config_path(config_path: Optional[Path] = None) -> Optional[Path]: + """Checks if configuration file exists""" + if config_path is None: + config_path = Path(DEFAULT_CONFIG_FNAME) + config_path.resolve() + + if not config_path.exists() or not config_path.is_file: + print(NO_CONFIG_FOUND_MESSAGE) + raise FileNotFoundError(f"Configuration file {config_path} not found") + + return config_path + + +def checkout_branch(branch_name: str, + is_new_branch: bool = False, + is_new_experiment: bool = False, + keep_uuid: bool = False, + start_point: Optional[str] = None, + restart_path: Optional[Path] = None, + config_path: Optional[Path] = None, + control_path: Optional[Path] = None, + model_type: Optional[str] = None, + lab_path: Optional[Path] = None, + parent_experiment: Optional[str] = None) -> None: + """Checkout branch, setup metadata and add symlinks + + Parameters + ---------- + branch_name : str + Name of branch to checkout/create + is_new_branch: bool, default False + Create new branch and mark as new experiment + is_new_experiment: bool, default False + Create new uuid for this experiment + keep_uuid: bool, default False + Keep UUID unchanged, if it exists - this overrides is_new_experiment + if there is a pre-existing UUID + start_point: Optional[str] + Branch name or commit hash to start new branch from + restart_path: Optional[Path] + Absolute restart path to start experiment from + config_path: Optional[Path] + Path to configuration file - config.yaml + control_path: Optional[Path] + Path to control directory - defaults to current working directory + model_type: Optional[str] + Type of model - used for creating a Laboratory + lab_path: Optional[Path] + Path to laboratory directory + parent_experiment: Optional[str] + Parent experiment UUID to add to generated metadata + """ + if control_path is None: + control_path = get_control_path(config_path) + + # Checkout branch + repo = GitRepository(control_path) + repo.checkout_branch(branch_name, is_new_branch, start_point) + + # Check config file exists on checked out branch + config_path = check_config_path(config_path) + + # Initialise Lab and Metadata + lab = Laboratory(model_type, config_path, lab_path) + metadata = Metadata(Path(lab.archive_path), + branch=branch_name, + config_path=config_path) + + # Setup Metadata + is_new_experiment = is_new_experiment or is_new_branch + metadata.setup(keep_uuid=keep_uuid, + is_new_experiment=is_new_experiment) + + # Gets valid prior restart path + prior_restart_path = None + if restart_path: + prior_restart_path = check_restart(restart_path=restart_path, + archive_path=metadata.archive_path) + + # Create/update and commit metadata file + metadata.write_metadata(set_template_values=True, + restart_path=prior_restart_path, + parent_experiment=parent_experiment) + + # Add restart option to config + if prior_restart_path: + add_restart_to_config(prior_restart_path, config_path=config_path) + + # Switch/Remove/Add archive and work symlinks + experiment = metadata.experiment_name + switch_symlink(Path(lab.archive_path), control_path, experiment, 'archive') + switch_symlink(Path(lab.work_path), control_path, experiment, 'work') + + +def switch_symlink(lab_dir_path: Path, control_path: Path, + experiment_name: str, sym_dir: str) -> None: + """Helper function for removing and switching work and archive + symlinks in control directory""" + dir_path = lab_dir_path / experiment_name + sym_path = control_path / sym_dir + + # Remove symlink if it already exists + if sym_path.exists() and sym_path.is_symlink: + previous_path = sym_path.resolve() + sym_path.unlink() + print(f"Removed {sym_dir} symlink to {previous_path}") + + # Create symlink, if experiment directory exists in laboratory + if dir_path.exists(): + sym_path.symlink_to(dir_path) + print(f"Added {sym_dir} symlink to {dir_path}") + + +def clone(repository: str, + directory: Path, + branch: Optional[str] = None, + new_branch_name: Optional[str] = None, + keep_uuid: bool = False, + model_type: Optional[str] = None, + config_path: Optional[Path] = None, + lab_path: Optional[Path] = None, + restart_path: Optional[Path] = None, + parent_experiment: Optional[str] = None) -> None: + """Clone an experiment control repository. + + Parameters: + repository: str + Git URL or path to Git repository to clone + directory: Path + The control directory where the repository will be cloned + branch: Optional[str] + Name of branch to clone and checkout + new_branch_name: Optional[str] + Name of new branch to create and checkout. + If branch is also defined, the new branch will start from the + latest commit of the branch. + keep_uuid: bool, default False + Keep UUID unchanged, if it exists + config_path: Optional[Path] + Path to configuration file - config.yaml + control_path: Optional[Path] + Path to control directory - defaults to current working directory + model_type: Optional[str] + Type of model - used for creating a Laboratory + lab_path: Optional[Path] + Path to laboratory directory + restart_path: Optional[Path] + Absolute restart path to start experiment from + parent_experiment: Optional[str] + Parent experiment UUID to add to generated metadata + + Returns: None + """ + # Resolve directory to an absolute path + control_path = directory.resolve() + + # git clone the repository + repo = git_clone(repository, control_path, branch) + + owd = os.getcwd() + try: + # cd into cloned directory + os.chdir(control_path) + + # Use checkout wrapper + if new_branch_name is not None: + # Create and checkout new branch + checkout_branch(is_new_branch=True, + keep_uuid=keep_uuid, + branch_name=new_branch_name, + restart_path=restart_path, + config_path=config_path, + control_path=control_path, + model_type=model_type, + lab_path=lab_path, + parent_experiment=parent_experiment) + else: + # Checkout branch + if branch is None: + branch = repo.get_branch_name() + + checkout_branch(branch_name=branch, + config_path=config_path, + keep_uuid=keep_uuid, + restart_path=restart_path, + control_path=control_path, + model_type=model_type, + lab_path=lab_path, + is_new_experiment=True, + parent_experiment=parent_experiment) + finally: + # Change back to original working directory + os.chdir(owd) + + print(f"To change directory to control directory run:\n cd {directory}") + + +def get_branch_metadata(branch: git.Head) -> Optional[CommentedMap]: + """Return dictionary of branch metadata if it exists, None otherwise""" + for blob in branch.commit.tree.blobs: + if blob.name == METADATA_FILENAME: + # Read file contents + metadata_content = blob.data_stream.read().decode('utf-8') + return YAML().load(metadata_content) + + +def contains_config(branch: git.Head) -> bool: + """Checks if config file in defined in given branch""" + contains_config = False + for blob in branch.commit.tree.blobs: + if blob.name == DEFAULT_CONFIG_FNAME: + contains_config = True + return contains_config + + +def print_branch_metadata(branch: git.Head, verbose: bool = False): + """Display given Git branch UUID, or if config.yaml or metadata.yaml does + not exist. + + Parameters: + branch: git.Head + Branch object to parse commit tree. + verbose: bool, default False + Display entire metadata files + remote: bool, default False + Display remote Git branches + + Returns: None + """ + # Print branch info + if not contains_config(branch): + print(f" No config file found") + return + + metadata = get_branch_metadata(branch) + + if metadata is None: + print(" No metadata file found") + return + + if verbose: + # Print all non-null metadata values + for key, value in metadata.items(): + if value: + print(f' {key}: {value}') + else: + # Print uuid + uuid = metadata.get(UUID_FIELD, None) + if uuid is not None: + print(f" {UUID_FIELD}: {uuid}") + else: + print(f" No UUID in metadata file") + + +def list_branches(config_path: Optional[Path] = None, + verbose: bool = False, + remote: bool = False): + """Display local Git branches UUIDs. + + Parameters: + verbose: bool, default False + Display entire metadata files + remote: bool, default False + Display remote Git branches + + Returns: None""" + control_path = get_control_path(config_path) + git_repo = GitRepository(control_path) + + current_branch = git_repo.repo.active_branch + print(f"* Current Branch: {current_branch.name}") + print_branch_metadata(current_branch, verbose) + + if remote: + branches = git_repo.remote_branches_dict() + label = "Remote Branch" + else: + branches = git_repo.local_branches_dict() + label = "Branch" + + for branch_name, branch in branches.items(): + if branch != current_branch: + print(f"{label}: {branch_name}") + print_branch_metadata(branch, verbose) diff --git a/payu/cli.py b/payu/cli.py index 433dc216..1a6b5bab 100644 --- a/payu/cli.py +++ b/payu/cli.py @@ -30,7 +30,6 @@ def parse(): """Parse the command line inputs and execute the subcommand.""" - parser = generate_parser() # Display help if no arguments are provided diff --git a/payu/experiment.py b/payu/experiment.py index f0d85218..40572fbb 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -19,6 +19,7 @@ import shutil import subprocess as sp import sysconfig +from pathlib import Path # Extensions import yaml @@ -26,6 +27,7 @@ # Local from payu import envmod from payu.fsops import mkdir_p, make_symlink, read_config, movetree +from payu.fsops import list_archive_dirs from payu.schedulers.pbs import get_job_info, pbs_env_init, get_job_id from payu.models import index as model_index import payu.profilers @@ -33,6 +35,7 @@ from payu.manifest import Manifest from payu.calendar import parse_date_offset from payu.sync import SyncToRemoteArchive +from payu.metadata import Metadata # Environment module support on vayu # TODO: To be removed @@ -55,6 +58,10 @@ def __init__(self, lab, reproduce=False, force=False): self.start_time = datetime.datetime.now() + # Initialise experiment metadata - uuid and experiment name + self.metadata = Metadata(Path(lab.archive_path)) + self.metadata.setup() + # TODO: replace with dict, check versions via key-value pairs self.modules = set() @@ -92,6 +99,9 @@ def __init__(self, lab, reproduce=False, force=False): self.set_output_paths() + # Create metadata file and move to archive + self.metadata.write_metadata(restart_path=self.prior_restart_path) + if not reproduce: # check environment for reproduce flag under PBS reproduce = os.environ.get('PAYU_REPRODUCE', False) @@ -189,7 +199,8 @@ def max_output_index(self, output_type="output"): """Given a output directory type (output or restart), return the maximum index of output directories found""" try: - output_dirs = self.list_output_dirs(output_type) + output_dirs = list_archive_dirs(archive_path=self.archive_path, + dir_type=output_type) except EnvironmentError as exc: if exc.errno == errno.ENOENT: output_dirs = None @@ -199,17 +210,6 @@ def max_output_index(self, output_type="output"): if output_dirs and len(output_dirs): return int(output_dirs[-1].lstrip(output_type)) - def list_output_dirs(self, output_type="output", full_path=False): - """Return a sorted list of restart or output directories in archive""" - naming_pattern = re.compile(fr"^{output_type}[0-9][0-9][0-9]+$") - dirs = [d for d in os.listdir(self.archive_path) - if naming_pattern.match(d)] - dirs.sort(key=lambda d: int(d.lstrip(output_type))) - - if full_path: - dirs = [os.path.join(self.archive_path, d) for d in dirs] - return dirs - def set_stacksize(self, stacksize): if stacksize == 'unlimited': @@ -289,8 +289,7 @@ def set_expt_pathnames(self): self.control_path = self.config.get('control_path') # Experiment name - self.name = self.config.get('experiment', - os.path.basename(self.control_path)) + self.name = self.metadata.experiment_name # Experiment subdirectories self.archive_path = os.path.join(self.lab.archive_path, self.name) @@ -454,7 +453,6 @@ def setup(self, force_archive=False): self.get_restarts_to_prune() def run(self, *user_flags): - # XXX: This was previously done in reversion envmod.setup() @@ -965,7 +963,8 @@ def get_restarts_to_prune(self, return [] # List all restart directories in archive - restarts = self.list_output_dirs(output_type='restart') + restarts = list_archive_dirs(archive_path=self.archive_path, + dir_type='restart') # TODO: Previous logic was to prune all restarts if self.repeat_run # Still need to figure out what should happen in this case diff --git a/payu/fsops.py b/payu/fsops.py index 5a58a82c..cc1c09f7 100644 --- a/payu/fsops.py +++ b/payu/fsops.py @@ -10,10 +10,13 @@ # Standard library import errno import os +from pathlib import Path +import re import shutil import sys import shlex import subprocess +from typing import Union, List # Extensions import yaml @@ -207,4 +210,22 @@ def required_libs(bin_path): except: print("payu: error running ldd command on exe path: ", bin_path) return {} - return parse_ldd_output(ldd_out) \ No newline at end of file + return parse_ldd_output(ldd_out) + + +def list_archive_dirs(archive_path: Union[Path, str], + dir_type: str = "output") -> List[str]: + """Return a sorted list of restart or output directories in archive""" + naming_pattern = re.compile(fr"^{dir_type}[0-9][0-9][0-9]+$") + + if isinstance(archive_path, str): + archive_path = Path(archive_path) + + dirs = [] + for path in archive_path.iterdir(): + real_path = path.resolve() + if real_path.is_dir() and naming_pattern.match(path.name): + dirs.append(path.name) + + dirs.sort(key=lambda d: int(d.lstrip(dir_type))) + return dirs diff --git a/payu/git_utils.py b/payu/git_utils.py new file mode 100644 index 00000000..3f83362e --- /dev/null +++ b/payu/git_utils.py @@ -0,0 +1,198 @@ +"""Simple wrappers around git commands + +Using the GitPython library for interacting with Git +""" + +import warnings +from pathlib import Path +from typing import Optional, Union, List, Dict + +import git +import configparser + + +class PayuBranchError(Exception): + """Custom exception for payu branch operations""" + + +class PayuGitWarning(Warning): + """Custom warning class - useful for testing""" + + +def get_git_repository(repo_path: Union[Path, str], + initialise: bool = False, + catch_error: bool = False) -> Optional[git.Repo]: + """Return a PythonGit repository object at given path. If initialise is + true, it will attempt to initialise a repository if it does not exist. + Otherwise, if catch_error is true, it will return None""" + try: + repo = git.Repo(repo_path) + return repo + except git.exc.InvalidGitRepositoryError: + if initialise: + repo = git.Repo.init(repo_path) + print(f"Initialised new git repository at: {repo_path}") + return repo + + warnings.warn( + f"Path is not a valid git repository: {repo_path}", + PayuGitWarning + ) + if catch_error: + return None + raise + + +class GitRepository: + """Simple wrapper around git python's repo and methods""" + + def __init__(self, + repo_path: Union[Path, str], + repo: Optional[git.Repo] = None, + catch_error: bool = False): + self.repo_path = repo_path + + # Initialise git repository object + if repo is None: + repo = get_git_repository(repo_path, catch_error=catch_error) + self.repo = repo + + def get_branch_name(self) -> Optional[str]: + """Return the current git branch or None if repository path is + not a git repository""" + if self.repo: + return str(self.repo.active_branch) + + def get_hash(self) -> Optional[str]: + """Return the current git commit hash or None if repository path is + not a git repository""" + if self.repo: + return self.repo.active_branch.object.hexsha + + def get_origin_url(self) -> Optional[str]: + """Return url of remote origin if it exists""" + if self.repo and self.repo.remotes and self.repo.remotes.origin: + return self.repo.remotes.origin.url + + def get_user_info(self, config_key: str) -> Optional[str]: + """Return git config user info, None otherwise. Used for retrieving + name and email saved in git""" + if self.repo is None: + return + + try: + config_reader = self.repo.config_reader() + return config_reader.get_value('user', config_key) + except (configparser.NoSectionError, configparser.NoOptionError): + # No git config set for user.$config_key + return + + def commit(self, + commit_message: str, + paths_to_commit: List[Union[Path, str]]) -> None: + """Add a git commit of changes to paths""" + if self.repo is None: + return + + # Un-stage any pre-existing changes + self.repo.index.reset() + + # Check if paths to commit have changed, or it is an untracked file + changes = False + untracked_files = [Path(self.repo_path) / path + for path in self.repo.untracked_files] + for path in paths_to_commit: + if self.repo.git.diff(None, path) or path in untracked_files: + self.repo.index.add([path]) + changes = True + + # Run commit if there's changes + if changes: + self.repo.index.commit(commit_message) + print(commit_message) + + def local_branches_dict(self) -> Dict[str, git.Head]: + """Return a dictionary mapping local branch names to git.Head + objects""" + branch_names_dict = {} + for head in self.repo.heads: + branch_names_dict[head.name] = head + return branch_names_dict + + def remote_branches_dict(self) -> Dict[str, git.Head]: + """Return a dictionary mapping remote branch names to git.Head + objects""" + branch_names_dict = {} + for remote in self.repo.remotes: + remote.fetch() + for ref in remote.refs: + branch_names_dict[ref.remote_head] = ref + return branch_names_dict + + def checkout_branch(self, + branch_name: str, + new_branch: bool = False, + start_point: Optional[str] = None) -> None: + """Checkout branch and create branch if specified""" + # First check for staged changes + if self.repo.is_dirty(index=True, working_tree=False): + raise PayuBranchError( + "There are staged git changes. Please stash or commit them " + "before running the checkout command again.\n" + "To see what files are staged, run: git status" + ) + + # Existing branches + local_branches = self.local_branches_dict().keys() + remote_branches = self.remote_branches_dict() + all_branches = local_branches | set(remote_branches.keys()) + + # Create new branch, if specified + if new_branch: + if branch_name in all_branches: + raise PayuBranchError( + f"A branch named {branch_name} already exists. " + "To checkout this branch, remove the new branch flag '-b' " + "from the checkout command." + ) + + if start_point is not None: + if (start_point not in local_branches and + start_point in remote_branches): + # Use hash for remote start point + start_point = remote_branches[start_point].commit + branch = self.repo.create_head(branch_name, commit=start_point) + else: + branch = self.repo.create_head(branch_name) + branch.checkout() + + print(f"Created and checked out new branch: {branch_name}") + return + + # Checkout branch + if branch_name not in all_branches: + raise PayuBranchError( + f"There is no existing branch called {branch_name}. " + "To create this branch, add the new branch flag '-b' " + "to the checkout command." + ) + + self.repo.git.checkout(branch_name) + print(f"Checked out branch: {branch_name}") + + +def git_clone(repository: str, + directory: Union[str, Path], + branch: Optional[str] = None) -> GitRepository: + """Clone repository to directory""" + # Clone the repository + if branch is not None: + repo = git.Repo.clone_from(repository, + to_path=directory, + branch=branch) + else: + repo = git.Repo.clone_from(repository, to_path=directory) + + print(f"Cloned repository from {repository} to directory: {directory}") + + return GitRepository(repo_path=directory, repo=repo) diff --git a/payu/metadata.py b/payu/metadata.py new file mode 100644 index 00000000..84a798f8 --- /dev/null +++ b/payu/metadata.py @@ -0,0 +1,363 @@ +"""Payu experiment UUID and metadata support + +Generates and commit a new experiment uuid and updates/creates experiment +metadata + +:copyright: Copyright 2011 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +import requests +import shutil +import uuid +import warnings +from datetime import datetime +from pathlib import Path +from typing import Optional, Union + +from ruamel.yaml import YAML +from ruamel.yaml.comments import CommentedMap + +from payu.fsops import read_config, mkdir_p +from payu.git_utils import GitRepository + +# A truncated uuid is used for branch-uuid aware experiment names +TRUNCATED_UUID_LENGTH = 8 + +# Metadata file field names +UUID_FIELD = "experiment_uuid" +PARENT_UUID_FIELD = "parent_experiment" +CONTACT_FIELD = "contact" +EMAIL_FIELD = "email" +NAME_FIELD = "name" +GIT_URL_FIELD = "url" +CREATED_FIELD = "created" +MODEL_FIELD = "model" +METADATA_FILENAME = "metadata.yaml" + +# Metadata Schema +SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/main/experiment_asset.json" + + +class MetadataWarning(Warning): + pass + + +class Metadata: + """ + Class to store/update/create metadata such as experiment uuid and name + + Parameters: + laboratory_archive_path : Path + The archive sub-directory in Laboratory + branch : Optional[str] + The git branch on which the experiment is run + control_path : Optional[Path] + Path to where the experiment is configured and run. The default + is set to the current working directory. This default is set in + in fsops.read_config + config_path : Optional[Path] + Configuration Path. The default is config.yaml in the current + working directory. This is also set in fsop.read_config + """ + + def __init__(self, + laboratory_archive_path: Path, + config_path: Optional[Path] = None, + branch: Optional[str] = None, + control_path: Optional[Path] = None) -> None: + self.config = read_config(config_path) + self.metadata_config = self.config.get('metadata', {}) + + if control_path is None: + control_path = Path(self.config.get("control_path")) + self.control_path = control_path + self.filepath = self.control_path / METADATA_FILENAME + self.lab_archive_path = laboratory_archive_path + + # Config flag to disable creating metadata files and UUIDs + self.enabled = self.metadata_config.get('enable', True) + + if self.enabled: + self.repo = GitRepository(self.control_path, catch_error=True) + + self.branch = branch + self.branch_uuid_experiment = True + + # Set uuid if in metadata file + metadata = self.read_file() + self.uuid = metadata.get(UUID_FIELD, None) + self.uuid_updated = False + + # Experiment name configuration - this overrides experiment name + self.config_experiment_name = self.config.get("experiment", None) + + def read_file(self) -> CommentedMap: + """Read metadata file - preserving orginal format if it exists""" + metadata = CommentedMap() + if self.filepath.exists(): + # Use default ruamel YAML to preserve comments and multi-line + # strings + metadata = YAML().load(self.filepath) + return metadata + + def setup(self, + is_new_experiment: bool = False, + keep_uuid: bool = False) -> None: + """Set UUID and experiment name. + + Parameters: + keep_uuid: bool, default False + Keep pre-existing UUID, if it exists. + is_new_experiment: bool, default False + If not keep_uuid, generate a new UUID and a branch-uuid aware + experiment name. This is set in payu.branch.checkout_branch. + Return: None + + Note: Experiment name is the name used for the work and archive + directories in the Laboratory. + """ + if not self.enabled: + # Set experiment name only - either configured or includes branch + self.set_experiment_name(ignore_uuid=True) + + elif self.uuid is not None and (keep_uuid or not is_new_experiment): + self.set_experiment_name(keep_uuid=keep_uuid, + is_new_experiment=is_new_experiment) + else: + # Generate new UUID + if self.uuid is None and not is_new_experiment: + warnings.warn("No experiment uuid found in metadata. " + "Generating a new uuid", MetadataWarning) + self.set_new_uuid(is_new_experiment=is_new_experiment) + + self.archive_path = self.lab_archive_path / self.experiment_name + + def new_experiment_name(self, ignore_uuid: bool = False) -> str: + """Generate a new experiment name""" + if self.branch is None: + self.branch = self.repo.get_branch_name() + + # Add branch and a truncated uuid to control directory name + adding_branch = self.branch not in (None, 'main', 'master') + suffix = f'-{self.branch}' if adding_branch else '' + + if not ignore_uuid: + truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] + suffix += f'-{truncated_uuid}' + + return self.control_path.name + suffix + + def set_experiment_name(self, + is_new_experiment: bool = False, + keep_uuid: bool = False, + ignore_uuid: bool = False) -> None: + """Set experiment name - this is used for work and archive + sub-directories in the Laboratory""" + if self.config_experiment_name is not None: + # The configured value over-rides the experiment name + self.experiment_name = self.config_experiment_name + self.branch_uuid_experiment = False + print(f"Experiment name is configured in config.yaml: ", + self.experiment_name) + return + + if ignore_uuid: + # Leave experiment UUID out of experiment name + self.experiment_name = self.new_experiment_name(ignore_uuid=True) + return + + # Branch-UUID experiment name and archive path + branch_uuid_experiment_name = self.new_experiment_name() + archive_path = self.lab_archive_path / branch_uuid_experiment_name + + # Legacy experiment name and archive path + legacy_name = self.control_path.name + legacy_archive_path = self.lab_archive_path / legacy_name + + if is_new_experiment or archive_path.exists(): + # Use branch-UUID aware experiment name + self.experiment_name = branch_uuid_experiment_name + elif legacy_archive_path.exists(): + # Use legacy CONTROL-DIR experiment name + self.experiment_name = legacy_name + print(f"Pre-existing archive found at: {legacy_archive_path}. " + f"Experiment name will remain: {legacy_name}") + self.branch_uuid_experiment = False + elif keep_uuid: + # Use same experiment UUID and use branch-UUID name for archive + self.experiment_name = branch_uuid_experiment_name + else: + # No archive exists - Detecting new experiment + warnings.warn( + "No pre-existing archive found. Generating a new uuid", + MetadataWarning + ) + self.set_new_uuid(is_new_experiment=True) + + def set_new_uuid(self, is_new_experiment: bool = False) -> None: + """Generate a new uuid and set experiment name""" + self.uuid_updated = True + self.uuid = generate_uuid() + self.set_experiment_name(is_new_experiment=is_new_experiment) + + # If experiment name does not include UUID, leave it unchanged + if not self.branch_uuid_experiment: + return + + # Check experiment name is unique in local archive + lab_archive_path = self.lab_archive_path + if lab_archive_path.exists(): + local_experiments = [item for item in lab_archive_path.iterdir() + if item.is_dir()] + while self.experiment_name in local_experiments: + # Generate a new id and experiment name + self.uuid = generate_uuid() + self.set_experiment_name(is_new_experiment=is_new_experiment) + + def write_metadata(self, + restart_path: Optional[Union[Path, str]] = None, + set_template_values: bool = False, + parent_experiment: Optional[str] = None) -> None: + """Create/update metadata file, commit any changes and + copy metadata file to the experiment archive. + + Parameters: + restart_path: Optional[Path] + Prior restart path - used for finding parent metadata + set_template_values: bool, default False + Read schema and set metadata template values for new + experiments + parent_experiment: Optional[str] + Parent experiment UUID to add to generated metadata + + Return: None + + Note: This assumes setup() has been run to set UUID and experiment name + """ + if not self.enabled: + # Skip creating/updating/commiting metadata + return + + if self.uuid_updated: + # Update metadata if UUID has changed + restart_path = Path(restart_path) if restart_path else None + self.update_file(restart_path=restart_path, + set_template_values=set_template_values, + parent_experiment=parent_experiment) + self.commit_file() + + self.copy_to_archive() + + def update_file(self, + restart_path: Optional[Path] = None, + set_template_values: bool = False, + parent_experiment: Optional[str] = None) -> None: + """Write any updates to metadata file""" + metadata = self.read_file() + + # Add UUID field + metadata[UUID_FIELD] = self.uuid + + # Update parent UUID field + if parent_experiment is None: + parent_experiment = self.get_parent_experiment(restart_path) + if parent_experiment and parent_experiment != self.uuid: + metadata[PARENT_UUID_FIELD] = parent_experiment + + # Add extra fields if new branch-uuid experiment + # so to not over-write fields if it's a pre-existing legacy experiment + if self.branch_uuid_experiment: + metadata[CREATED_FIELD] = datetime.now().strftime('%Y-%m-%d') + metadata[NAME_FIELD] = self.experiment_name + metadata[MODEL_FIELD] = self.get_model_name() + + # Add origin git URL, if defined + url = self.repo.get_origin_url() + if url: + metadata[GIT_URL_FIELD] = url + + # Add email + contact if defined in git config + contact = self.repo.get_user_info(config_key='name') + if contact: + metadata[CONTACT_FIELD] = contact + + email = self.repo.get_user_info(config_key="email") + if email: + metadata[EMAIL_FIELD] = email + + if set_template_values: + # Note that retrieving schema requires internet access + add_template_metadata_values(metadata) + + # Write updated metadata to file + YAML().dump(metadata, self.filepath) + + def get_model_name(self) -> str: + """Get model name from config file""" + # Use capitilised model name unless a specific model name is defined + default_model_name = self.config.get('model').upper() + model_name = self.metadata_config.get('model', default_model_name) + return model_name + + def get_parent_experiment(self, prior_restart_path: Path) -> None: + """Searches UUID in the metadata in the parent directory that + contains the restart""" + if prior_restart_path is None: + return + + # Resolve to absolute path + prior_restart_path = prior_restart_path.resolve() + + # Check for pre-existing metadata file + base_output_directory = Path(prior_restart_path).parent + metadata_filepath = base_output_directory / METADATA_FILENAME + if not metadata_filepath.exists(): + return + + # Read metadata file + parent_metadata = YAML().load(metadata_filepath) + return parent_metadata.get(UUID_FIELD, None) + + def commit_file(self) -> None: + """Add a git commit for changes to metadata file, if file has changed + and if control path is a git repository""" + commit_message = f"Updated metadata. Experiment UUID: {self.uuid}" + self.repo.commit(commit_message=commit_message, + paths_to_commit=[self.filepath]) + + def copy_to_archive(self) -> None: + """Copy metadata file to archive""" + mkdir_p(self.archive_path) + shutil.copy(self.filepath, self.archive_path / METADATA_FILENAME) + # Note: The existence of an archive is used for determining + # experiment names and whether to generate a new UUID + + +def get_schema_from_github(): + """Retrieve metadata schema from github""" + response = requests.get(SCHEMA_URL) + + if response.status_code == 200: + return response.json() + else: + print(f"Failed to fetch schema from {SCHEMA_URL}") + return response.json() if response.status_code == 200 else {} + + +def add_template_metadata_values(metadata: CommentedMap) -> None: + """Add in templates for un-set metadata values""" + schema = get_schema_from_github() + + for key, value in schema.get('properties', {}).items(): + if key not in metadata: + # Add field with commented description of value + description = value.get('description', None) + if description is not None: + metadata[key] = None + metadata.yaml_add_eol_comment(description, key) + + +def generate_uuid() -> str: + """Generate a new uuid""" + return str(uuid.uuid4()) diff --git a/payu/models/model.py b/payu/models/model.py index 80548f77..9d4365aa 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -91,7 +91,6 @@ def set_model_pathnames(self): self.exec_name) else: self.exec_path = None - if self.exec_path: # Make exec_name consistent for models with fully qualified path. # In all cases it will just be the name of the executable without a diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index 253701a9..cf3da7b3 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -149,3 +149,129 @@ syncing.', } } + +# Clone Repository +repository = { + 'flags': [], + 'parameters': { + 'dest': 'repository', + 'help': 'The repository to clone from. This can be either a local \ + path or git url' + } +} + +# Clone to directory +local_directory = { + 'flags': [], + 'parameters': { + 'dest': 'local_directory', + 'help': 'The directory to clone into' + } +} + +# Clone uuid flag +keep_uuid = { + 'flags': ('-k', '--keep-uuid'), + 'parameters': { + 'action': 'store_true', + 'default': False, + 'dest': 'keep_uuid', + 'help': 'If an experiment uuid exists, leave it unchanged' + } +} + +# Clone branch +clone_branch = { + 'flags': ('--branch', '-B'), + 'parameters': { + 'action': 'store', + 'dest': 'branch', + 'default': None, + 'help': 'Clone and checkout this branch' + } +} + +# Clone create branch +new_branch_name = { + 'flags': ('--new-branch', '-b'), + 'parameters': { + 'action': 'store', + 'dest': 'new_branch_name', + 'default': None, + 'help': 'The name of the git branch to create and checkout' + } +} + +# Parent experiment UUID +parent_experiment = { + 'flags': ('--parent-experiment', '-p'), + 'parameters': { + 'action': 'store', + 'dest': 'parent_experiment', + 'default': None, + 'help': 'The parent experiment UUID to add to generated metadata' + } +} + +# Branch name +branch_name = { + 'flags': [], + 'parameters': { + 'dest': 'branch_name', + 'help': 'The name of the git branch to create/checkout' + } +} + +# Branch start point +start_point = { + 'flags': [], + 'parameters': { + 'nargs': '?', + 'dest': 'start_point', + 'help': 'The new branch head will point to this commit' + } +} + + +# Branch starting restart +restart_path = { + 'flags': ('--restart', '-r'), + 'parameters': { + 'dest': 'restart_path', + 'action': 'store', + 'help': 'The restart path from which to start the model run' + } +} + +# Checkout new branch flag +new_branch = { + 'flags': ['-b'], + 'parameters': { + 'dest': 'new_branch', + 'action': 'store_true', + 'default': False, + 'help': 'Create new branch' + } +} + +# List branches verbose flag +verbose = { + 'flags': ['--verbose', '-v'], + 'parameters': { + 'dest': 'verbose', + 'action': 'store_true', + 'default': False, + 'help': 'Display all contents of metadata file' + } +} + +# List remote branches flag +remote = { + 'flags': ['--remote', '-r'], + 'parameters': { + 'dest': 'remote', + 'action': 'store_true', + 'default': False, + 'help': 'Display metadata of branches in remote directory' + } +} diff --git a/payu/subcommands/branch_cmd.py b/payu/subcommands/branch_cmd.py new file mode 100644 index 00000000..4a3df853 --- /dev/null +++ b/payu/subcommands/branch_cmd.py @@ -0,0 +1,24 @@ +"""Run the `payu branch` command. + +:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +from pathlib import Path + +from payu.branch import list_branches +import payu.subcommands.args as args + +title = 'branch' +parameters = {'description': ('List git branches and corresponding metadata')} + +arguments = [args.config, args.verbose, args.remote] + + +def runcmd(config_path, verbose, remote): + """Execute the command.""" + config_path = Path(config_path) if config_path is not None else None + list_branches(config_path, verbose, remote) + + +runscript = runcmd diff --git a/payu/subcommands/checkout_cmd.py b/payu/subcommands/checkout_cmd.py new file mode 100644 index 00000000..8ec26109 --- /dev/null +++ b/payu/subcommands/checkout_cmd.py @@ -0,0 +1,45 @@ +"""Run the `payu checkout` command. + +:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" +from pathlib import Path + +from payu.branch import checkout_branch +import payu.subcommands.args as args + +title = 'checkout' +parameters = {'description': ('A wrapper around git checkout. ' + 'Create a new branch (if specified), ' + 'checkout branch, setup experiment metadata ' + 'and create/switch archive/work symlinks')} + +arguments = [args.model, args.config, args.laboratory, args.new_branch, + args.branch_name, args.start_point, args.restart_path, + args.keep_uuid, args.parent_experiment] + + +def transform_strings_to_path(path_str=None): + return Path(path_str) if path_str is not None else None + + +def runcmd(model_type, config_path, lab_path, new_branch, + branch_name, start_point, + restart_path, keep_uuid, parent_experiment): + """Execute the command.""" + config_path = transform_strings_to_path(config_path) + lab_path = transform_strings_to_path(lab_path) + restart_path = transform_strings_to_path(restart_path) + + checkout_branch(is_new_branch=new_branch, + branch_name=branch_name, + start_point=start_point, + restart_path=restart_path, + config_path=config_path, + lab_path=lab_path, + model_type=model_type, + keep_uuid=keep_uuid, + parent_experiment=parent_experiment) + + +runscript = runcmd diff --git a/payu/subcommands/clone_cmd.py b/payu/subcommands/clone_cmd.py new file mode 100644 index 00000000..c15c60a0 --- /dev/null +++ b/payu/subcommands/clone_cmd.py @@ -0,0 +1,49 @@ +"""Run the `payu clone` command. + +:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +from pathlib import Path + +from payu.branch import clone +import payu.subcommands.args as args + +title = 'clone' +parameters = {'description': ('A wrapper around git clone. Clones a ' + 'control repository and setup new experiment ' + 'metadata')} + +arguments = [args.model, args.config, args.laboratory, + args.keep_uuid, args.clone_branch, + args.repository, args.local_directory, + args.new_branch_name, args.restart_path, + args.parent_experiment] + + +def transform_strings_to_path(path_str=None): + return Path(path_str) if path_str is not None else None + + +def runcmd(model_type, config_path, lab_path, keep_uuid, + branch, repository, local_directory, new_branch_name, restart_path, + parent_experiment): + """Execute the command.""" + config_path = transform_strings_to_path(config_path) + restart_path = transform_strings_to_path(restart_path) + lab_path = transform_strings_to_path(lab_path) + directory = transform_strings_to_path(local_directory) + + clone(repository=repository, + directory=directory, + branch=branch, + keep_uuid=keep_uuid, + model_type=model_type, + config_path=config_path, + lab_path=lab_path, + new_branch_name=new_branch_name, + restart_path=restart_path, + parent_experiment=parent_experiment) + + +runscript = runcmd diff --git a/payu/sync.py b/payu/sync.py index 88026074..f658f215 100644 --- a/payu/sync.py +++ b/payu/sync.py @@ -13,7 +13,8 @@ # Local -from payu.fsops import mkdir_p +from payu.fsops import mkdir_p, list_archive_dirs +from payu.metadata import METADATA_FILENAME class SourcePath(): @@ -48,8 +49,10 @@ def __init__(self, expt): def add_outputs_to_sync(self): """Add paths of outputs in archive to sync. The last output is protected""" - outputs = self.expt.list_output_dirs(output_type='output', - full_path=True) + outputs = list_archive_dirs(archive_path=self.expt.archive_path, + dir_type='output') + outputs = [os.path.join(self.expt.archive_path, output) + for output in outputs] if len(outputs) > 0: last_output = outputs.pop() if not self.ignore_last: @@ -69,8 +72,10 @@ def add_restarts_to_sync(self): return # Get sorted list of restarts in archive - restarts = self.expt.list_output_dirs(output_type='restart', - full_path=True) + restarts = list_archive_dirs(archive_path=self.expt.archive_path, + dir_type='restart') + restarts = [os.path.join(self.expt.archive_path, restart) + for restart in restarts] if restarts == []: return @@ -250,6 +255,12 @@ def run(self): self.source_paths.append(SourcePath(path=log_path, is_log_file=True)) + # Add metadata path to protected paths, if it exists + metadata_path = os.path.join(self.expt.archive_path, METADATA_FILENAME) + if os.path.isfile(metadata_path): + self.source_paths.append(SourcePath(path=metadata_path, + protected=True)) + # Add any additional paths to protected paths self.add_extra_source_paths() diff --git a/setup.py b/setup.py index 9b7b2dbc..ebdff68c 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,9 @@ 'yamanifest', 'dateutil', 'tenacity', - 'cftime' + 'cftime', + 'GitPython', + 'ruamel.yaml' ], install_requires=[ 'f90nml >= 0.16', @@ -45,7 +47,9 @@ 'requests[security]', 'python-dateutil', 'tenacity!=7.0.0', - 'cftime' + 'cftime', + 'GitPython >= 3.1.40', + 'ruamel.yaml >= 0.18.5' ], tests_require=[ 'pytest', @@ -59,6 +63,9 @@ 'payu-collate = payu.subcommands.collate_cmd:runscript', 'payu-profile = payu.subcommands.profile_cmd:runscript', 'payu-sync = payu.subcommands.sync_cmd:runscript', + 'payu-branch = payu.subcommands.branch_cmd:runscript', + 'payu-clone = payu.subcommands.clone_cmd:runscript', + 'payu-checkout = payu.subcommands.checkout_cmd:runscript' ] }, classifiers=[ diff --git a/test/common.py b/test/common.py index 0001dcfa..cbb6b9c9 100644 --- a/test/common.py +++ b/test/common.py @@ -25,9 +25,14 @@ payudir = tmpdir / 'payu' archive_dir = labdir / 'archive' + +# Note: These are using a fixed archive name which is set in config.yaml expt_archive_dir = archive_dir / ctrldir_basename expt_workdir = labdir / 'work' / ctrldir_basename +config_path = ctrldir / 'config.yaml' +metadata_path = ctrldir / 'metadata.yaml' + print('tmpdir: {}'.format(tmpdir)) config = { @@ -48,9 +53,17 @@ 'input': False, 'exe': False } - } + }, + 'runlog': False, + "experiment": ctrldir_basename, + "metadata": { + "enable": False + } } +metadata = { + "experiment_uuid": "testUuid", +} @contextmanager @@ -122,8 +135,8 @@ def payu_setup(model_type=None, force) -def write_config(config): - with (ctrldir / 'config.yaml').open('w') as file: +def write_config(config, path=config_path): + with path.open('w') as file: file.write(yaml.dump(config, default_flow_style=False)) @@ -201,6 +214,11 @@ def remove_expt_archive_dirs(type='restart'): print(e) +def write_metadata(metadata=metadata, path=metadata_path): + with path.open('w') as file: + file.write(yaml.dump(metadata, default_flow_style=False)) + + def make_all_files(): make_inputs() make_exe() diff --git a/test/models/test_mom6.py b/test/models/test_mom6.py index 1faedb1c..941fe3f1 100644 --- a/test/models/test_mom6.py +++ b/test/models/test_mom6.py @@ -8,8 +8,8 @@ import payu from test.common import cd -from test.common import tmpdir, ctrldir, labdir, expt_workdir -from test.common import write_config +from test.common import tmpdir, ctrldir, labdir, expt_workdir, ctrldir_basename +from test.common import write_config, write_metadata from test.common import make_random_file, make_inputs, make_exe verbose = True @@ -34,6 +34,8 @@ def setup_module(module): ctrldir.mkdir() expt_workdir.mkdir(parents=True) make_inputs() + make_exe() + write_metadata() except Exception as e: print(e) @@ -41,10 +43,13 @@ def setup_module(module): 'laboratory': 'lab', 'jobname': 'testrun', 'model': 'mom6', - 'exe': 'test.exe' + 'exe': 'test.exe', + 'experiment': ctrldir_basename, + 'metadata': { + 'enable': False + } } write_config(config) - make_exe() def teardown_module(module): diff --git a/test/test_branch.py b/test/test_branch.py new file mode 100644 index 00000000..f8fc9ae7 --- /dev/null +++ b/test/test_branch.py @@ -0,0 +1,628 @@ +import copy +import shutil +from pathlib import Path + +import pytest +import git +from ruamel.yaml import YAML +from unittest.mock import patch + +from payu.branch import add_restart_to_config, check_restart, switch_symlink +from payu.branch import checkout_branch, clone, list_branches +from payu.metadata import MetadataWarning +from payu.fsops import read_config + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir, archive_dir +from test.common import ctrldir_basename +from test.common import config as config_orig, write_config +from test.common import config_path, metadata_path +from test.common import make_expt_archive_dir + + +# Global config - Remove set experiment and metadata config +config = copy.deepcopy(config_orig) +config.pop("experiment") +config.pop("metadata") + + +@pytest.fixture(autouse=True) +def setup_and_teardown(): + # Create tmp, lab and control directories + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + except Exception as e: + print(e) + + yield + + # Remove tmp directory + try: + shutil.rmtree(tmpdir) + except Exception as e: + print(e) + + +def setup_control_repository(path=ctrldir, set_config=True): + """Return an new control repository""" + if set_config: + write_config(config, path=(path / "config.yaml")) + else: + (path / "newFile").touch() + # Initialise a control repo + repo = git.Repo.init(path) + repo.index.add("*") + # Commit the changes + repo.index.commit("First commit - initialising repository") + return repo + + +@pytest.mark.parametrize( + "test_config, expected_config", + [ + ( + """sync: + restart: true +# Test comment +restart: old/path/to/restart +anotherField: 1 +""", + """sync: + restart: true +# Test comment +restart: {0} +anotherField: 1 +""" + ), + ( + """# Test comment + +anotherField: 1""", + """# Test comment + +anotherField: 1 +restart: {0} +""" + ), + ] +) +def test_add_restart_to_config(test_config, expected_config): + """Test adding restart: path/to/restart to configuration file""" + restart_path = tmpdir / "archive" / "tmpRestart" + restart_path.mkdir(parents=True) + + expected_config = expected_config.format(restart_path) + + with config_path.open("w") as file: + file.write(test_config) + + # Function to test + with cd(ctrldir): + add_restart_to_config(restart_path, config_path) + + with config_path.open("r") as file: + updated_config = file.read() + + # Test order, comments are preserved + assert updated_config == expected_config + + +def test_check_restart_with_non_existent_restart(): + """Test restart path that does not exist raises a warning""" + restart_path = tmpdir / "restartDNE" + + expected_msg = (f"Given restart path {restart_path} does not exist. " + f"Skipping setting 'restart' in config file") + + with cd(ctrldir): + with pytest.warns(UserWarning, match=expected_msg): + restart_path = check_restart(restart_path, labdir / "archive") + + assert restart_path is None + + +def test_check_restart_with_pre_existing_restarts_in_archive(): + """Test pre-existing restarts in archive raises a warning""" + # Create pre-existing restart in archive + archive_path = labdir / "archive" + (archive_path / "restart000").mkdir(parents=True) + + # Create restart path in different archive + restart_path = labdir / "different_archive" / "restart000" + restart_path.mkdir(parents=True) + + expected_msg = ( + f"Pre-existing restarts found in archive: {archive_path}." + f"Skipping adding 'restart: {restart_path}' to config file" + ) + + with cd(ctrldir): + with pytest.warns(UserWarning, match=expected_msg): + restart_path = check_restart(restart_path, archive_path) + + assert restart_path is None + + +def test_switch_symlink_when_symlink_and_archive_exists(): + # Pre-existing experiment symlink + lab_archive = labdir / "archive" + previous_archive_dir = lab_archive / "Experiment0" + previous_archive_dir.mkdir(parents=True) + + archive_symlink = ctrldir / "archive" + archive_symlink.symlink_to(previous_archive_dir) + + # New Experiment - Existing archive + experiment_name = "Experiment1" + archive_dir = lab_archive / experiment_name + archive_dir.mkdir(parents=True) + + # Test Function + switch_symlink(lab_archive, ctrldir, experiment_name, "archive") + + # Assert new symlink is created + assert archive_symlink.exists() and archive_symlink.is_symlink() + assert archive_symlink.resolve() == archive_dir + + +def test_switch_symlink_when_symlink_exists_but_no_archive(): + # Pre-existing experiment symlink + lab_archive = labdir / "archive" + previous_archive_dir = lab_archive / "Experiment0" + previous_archive_dir.mkdir(parents=True) + + archive_symlink = ctrldir / "archive" + archive_symlink.symlink_to(previous_archive_dir) + + # New Experiment + experiment_name = "Experiment1" + + # Test Function + switch_symlink(lab_archive, ctrldir, experiment_name, "archive") + + # Assert no symlink is created but previous one is removed + assert not archive_symlink.exists() + assert not archive_symlink.is_symlink() + + +def test_switch_symlink_when_no_symlink_exists_and_no_archive(): + # New Experiment + experiment_name = "Experiment1" + lab_archive = labdir / "archive" + + archive_symlink = ctrldir / "archive" + + # Test Function + switch_symlink(lab_archive, ctrldir, experiment_name, "archive") + + # Assert no symlink + assert not archive_symlink.exists() + assert not archive_symlink.is_symlink() + + +def check_metadata(expected_uuid, + expected_experiment, + expected_parent_uuid=None, + metadata_file=metadata_path): + """Helper function to read metadata file and assert changed as expected""" + assert metadata_file.exists() + metadata = YAML().load(metadata_file) + assert metadata.get("experiment_uuid", None) == expected_uuid + assert metadata.get("parent_experiment", None) == expected_parent_uuid + + # Assert archive exists for experiment name + assert (archive_dir / expected_experiment / "metadata.yaml").exists() + copied_metadata = YAML().load(metadata_file) + assert copied_metadata == metadata + + +def check_branch_metadata(repo, + expected_current_branch, + expected_uuid, + expected_experiment, + expected_parent_uuid=None, + metadata_file=metadata_path): + """Helper function for checking expected branch and metadata""" + # Check metadata + check_metadata(expected_uuid, + expected_experiment, + expected_parent_uuid, + metadata_file=metadata_file) + + # Check cuurent branch + assert str(repo.active_branch) == expected_current_branch + + # Check last commit message + expected_commit_msg = f"Updated metadata. Experiment UUID: {expected_uuid}" + assert repo.head.commit.message == expected_commit_msg + + +@patch("uuid.uuid4") +def test_checkout_branch(mock_uuid): + repo = setup_control_repository() + + # Mock uuid1 value + uuid1 = "8ddc1985-b7d0-4d4d-884f-061ecd90d478" + mock_uuid.return_value = uuid1 + + with cd(ctrldir): + # Test checkout new branch (with no existing metadata) + checkout_branch(branch_name="Branch1", + is_new_branch=True, + lab_path=labdir) + + # Check current branch, new commit was added, and metadata created + branch1_experiment_name = f"{ctrldir_basename}-Branch1-8ddc1985" + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch="Branch1", + expected_experiment=branch1_experiment_name) + + # Save commit hash to check later on + branch_1_commit_hash = repo.active_branch.object.hexsha + + # Mock uuid2 value + uuid2 = "2de5b001-df08-4c0b-ab15-f47f8ad72929" + mock_uuid.return_value = uuid2 + + with cd(ctrldir): + # Test checkout new branch from branch with existing metadata + checkout_branch(branch_name="Branch2", + is_new_branch=True, + lab_path=labdir) + + # Check current branch, new commit was added, and metadata created + branch2_experiment_name = f"{ctrldir_basename}-Branch2-2de5b001" + check_branch_metadata(repo, + expected_uuid=uuid2, + expected_current_branch="Branch2", + expected_experiment=branch2_experiment_name) + + # Mock uuid3 value + uuid3 = "98c99f06-260e-42cc-a23f-f113fae825e5" + mock_uuid.return_value = uuid3 + + with cd(ctrldir): + # Test checkout new branch from starting branch with existing metadata + checkout_branch(branch_name="Branch3", + is_new_branch=True, + start_point="Branch1", + lab_path=labdir) + + # Check current branch, new commit was added, and metadata created + branch3_experiment_name = f"{ctrldir_basename}-Branch3-98c99f06" + check_branch_metadata(repo, + expected_uuid=uuid3, + expected_current_branch="Branch3", + expected_experiment=branch3_experiment_name) + + # Check second to last commit was last commit on branch 1 + second_latest_commit = list(repo.iter_commits(max_count=2))[1] + assert second_latest_commit.hexsha == branch_1_commit_hash + + with cd(ctrldir): + # Test checkout existing branch with existing metadata + checkout_branch(branch_name="Branch1", + lab_path=labdir) + + # Check metadata and commit has not changed on Branch1 + assert str(repo.active_branch) == "Branch1" + check_metadata(expected_experiment=branch1_experiment_name, + expected_uuid=uuid1) + + # Assert commit hash is the same + assert repo.active_branch.object.hexsha == branch_1_commit_hash + + +@patch("uuid.uuid4") +def test_checkout_existing_branch_with_no_metadata(mock_uuid): + repo = setup_control_repository() + + # Create new branch + repo.create_head("Branch1") + + # Mock uuid1 value + uuid1 = "574ea2c9-2379-4484-86b4-1d1a0f820773" + mock_uuid.return_value = uuid1 + expected_no_uuid_msg = ( + "No experiment uuid found in metadata. Generating a new uuid" + ) + + with cd(ctrldir): + # Test checkout existing branch with no existing metadata + with pytest.warns(MetadataWarning, match=expected_no_uuid_msg): + checkout_branch(branch_name="Branch1", + lab_path=labdir) + + # Check metadata was created and commited + branch1_experiment_name = f"{ctrldir_basename}-Branch1-574ea2c9" + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch="Branch1", + expected_experiment=branch1_experiment_name) + + +@patch("uuid.uuid4") +def test_checkout_branch_with_no_metadata_and_with_legacy_archive(mock_uuid): + # Make experiment archive - This function creates legacy experiment archive + make_expt_archive_dir(type="restart", index=0) + + # Setup repo + repo = setup_control_repository() + + # Create new branch using git + repo.create_head("Branch1") + + # Mock uuid1 value + uuid1 = "df050eaf-c8bb-4b10-9998-e0202a1eabd2" + mock_uuid.return_value = uuid1 + expected_no_uuid_msg = ( + "No experiment uuid found in metadata. Generating a new uuid" + ) + + with cd(ctrldir): + # Test checkout existing branch (with no existing metadata) + # and with pre-existing archive + with pytest.warns(MetadataWarning, match=expected_no_uuid_msg): + checkout_branch(branch_name="Branch1", + lab_path=labdir) + + # Check metadata was created and commited - with legacy experiment name + branch1_experiment_name = ctrldir_basename + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch="Branch1", + expected_experiment=branch1_experiment_name) + + +@patch("uuid.uuid4") +def test_checkout_new_branch_existing_legacy_archive(mock_uuid): + # Using payu checkout new branch should generate new uuid, + # and experiment name - even if there"s a legacy archive + repo = setup_control_repository() + + # Add archive under legacy name + restart_path = Path(make_expt_archive_dir(type="restart")) + + # Mock uuid1 value + uuid1 = "d4437aae-8370-4567-a698-94d00ba87cdc" + mock_uuid.return_value = uuid1 + + with cd(ctrldir): + # Test checkout new branch (with no existing metadata) + checkout_branch(branch_name="Branch1", + is_new_branch=True, + restart_path=restart_path, + config_path=config_path, + lab_path=labdir) + + # Check metadata was created and commited - with branch-uuid aware name + branch1_experiment_name = f"{ctrldir_basename}-Branch1-d4437aae" + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch="Branch1", + expected_experiment=branch1_experiment_name) + + # Check restart path was added to configuration file + config = read_config(config_path) + assert config["restart"] == str(restart_path) + + +def test_checkout_branch_with_no_config(): + # Initialise a control repo with no config + repo = setup_control_repository(set_config=False) + + repo.create_head("Branch1") + + with cd(ctrldir): + # Test checkout branch that has no config raise error + with pytest.raises(FileNotFoundError): + checkout_branch(branch_name="Branch1", + lab_path=labdir) + + assert not metadata_path.exists() + + +@patch("uuid.uuid4") +def test_checkout_branch_with_restart_path(mock_uuid): + # Make experiment archive restart - starting with no metadata + restart_path = tmpdir / "remote_archive" / "restart0123" + restart_path.mkdir(parents=True) + + # Setup repo + repo = setup_control_repository() + + # Mock uuid1 value + uuid1 = "df050eaf-c8bb-4b10-9998-e0202a1eabd2" + mock_uuid.return_value = uuid1 + + with cd(ctrldir): + # Test checkout with restart path with no metadata + checkout_branch(is_new_branch=True, + branch_name="Branch1", + lab_path=labdir, + restart_path=restart_path) + + # Check metadata + experiment1_name = f"{ctrldir_basename}-Branch1-df050eaf" + check_branch_metadata(repo, + expected_current_branch='Branch1', + expected_uuid=uuid1, + expected_experiment=experiment1_name) + + # Create restart directory in Branch1 archive + restart_path = archive_dir / experiment1_name / 'restart0123' + restart_path.mkdir() + + # Mock uuid2 value + uuid2 = "9cc04c9b-f13d-4f1d-8a35-87146a4381ef" + mock_uuid.return_value = uuid2 + + with cd(ctrldir): + # Test checkout with restart path with metadata + checkout_branch(is_new_branch=True, + branch_name="Branch2", + lab_path=labdir, + restart_path=restart_path) + + # Check metadta - Check parent experiment is experment 1 UUID + experiment2_name = f"{ctrldir_basename}-Branch2-9cc04c9b" + check_branch_metadata(repo, + expected_current_branch='Branch2', + expected_uuid=uuid2, + expected_experiment=experiment2_name, + expected_parent_uuid=uuid1) + + +@patch("uuid.uuid4") +def test_clone(mock_uuid): + # Create a repo to clone + source_repo_path = tmpdir / "sourceRepo" + source_repo_path.mkdir() + source_repo = setup_control_repository(path=source_repo_path) + source_main_branch = str(source_repo.active_branch) + + # Create and checkout branch + branch1 = source_repo.create_head("Branch1") + branch1.checkout() + + # Mock uuid1 value + uuid1 = "9cc04c9b-f13d-4f1d-8a35-87146a4381ef" + mock_uuid.return_value = uuid1 + + # Test clone + cloned_repo_path = tmpdir / "clonedRepo" + clone(str(source_repo_path), cloned_repo_path, lab_path=labdir) + + # Check new commit added and expected metadata + cloned_repo = git.Repo(cloned_repo_path) + metadata_file = cloned_repo_path / "metadata.yaml" + check_branch_metadata(repo=cloned_repo, + expected_current_branch="Branch1", + expected_uuid=uuid1, + expected_experiment="clonedRepo-Branch1-9cc04c9b", + metadata_file=metadata_file) + branch_1_commit_hash = cloned_repo.active_branch.object.hexsha + + cloned_repo.git.checkout(source_main_branch) + + # Test clone of a clone - adding a new branch + uuid2 = "fd7b4804-d306-4a18-9d95-a8f565abfc9a" + mock_uuid.return_value = uuid2 + + # Run clone + with cd(tmpdir): + clone(str(cloned_repo_path), Path("clonedRepo2"), + lab_path=labdir, new_branch_name="Branch2", branch="Branch1", + parent_experiment=uuid1) + + # Check new commit added and expected metadata + cloned_repo2 = git.Repo(tmpdir / "clonedRepo2") + metadata_file = tmpdir / "clonedRepo2" / "metadata.yaml" + check_branch_metadata(repo=cloned_repo2, + expected_current_branch="Branch2", + expected_uuid=uuid2, + expected_experiment="clonedRepo2-Branch2-fd7b4804", + expected_parent_uuid=uuid1, + metadata_file=metadata_file) + + # Check branched from Branch1 + second_latest_commit = list(cloned_repo2.iter_commits(max_count=2))[1] + assert second_latest_commit.hexsha == branch_1_commit_hash + + # Check local branches + assert [head.name for head in cloned_repo2.heads] == ["Branch1", "Branch2"] + + +def add_and_commit_metadata(repo, metadata): + """Helper function to create/update metadata file and commit""" + metadata_path = ctrldir / "metadata.yaml" + YAML().dump(metadata, metadata_path) + repo.index.add("*") + repo.index.commit("Updated metadata.yaml") + + +def test_list_branches(capsys): + # Create repo and a few branches with and without metadata files + repo = setup_control_repository(set_config=False) + # Leave main branch with no metadata file + main_branch_name = str(repo.active_branch) + + # Branch 1 - has config but no metadata + branch1 = repo.create_head("Branch1") + branch1.checkout() + write_config(config) + repo.index.add("*") + repo.index.commit("Added config.yaml") + + # Checkout and add metadata to new branch + branch2 = repo.create_head("Branch2") + branch2.checkout() + write_config(config) + branch_2_metadata = { + "experiment_uuid": "b12345678", + } + add_and_commit_metadata(repo, branch_2_metadata) + + # New branch with no uuid in metadata + branch3 = repo.create_head("Branch3") + branch3.checkout() + branch_3_metadata = { + "email": "test@email.com", + "contact": "TestUser" + } + add_and_commit_metadata(repo, branch_3_metadata) + + # Test list branches + with cd(ctrldir): + list_branches() + + expected_printed_output = f"""* Current Branch: Branch3 + No UUID in metadata file +Branch: Branch1 + No metadata file found +Branch: Branch2 + experiment_uuid: b12345678 +Branch: {main_branch_name} + No config file found""" + captured = capsys.readouterr() + assert captured.out.strip() == expected_printed_output + + # Test list branches with verbose set + with cd(ctrldir): + list_branches(verbose=True) + + expected_verbose_output = f"""* Current Branch: Branch3 + email: test@email.com + contact: TestUser +Branch: Branch1 + No metadata file found +Branch: Branch2 + experiment_uuid: b12345678 +Branch: {main_branch_name} + No config file found""" + captured = capsys.readouterr() + assert captured.out.strip() == expected_verbose_output + + # Test remote branches + cloned_repo_path = tmpdir / "cloned_repo" + repo.clone(cloned_repo_path) + + with cd(cloned_repo_path): + list_branches(remote=True) + expected_remote_output = f"""* Current Branch: Branch3 + No UUID in metadata file +Remote Branch: Branch1 + No metadata file found +Remote Branch: Branch2 + experiment_uuid: b12345678 +Remote Branch: Branch3 + No UUID in metadata file +Remote Branch: HEAD + No UUID in metadata file +Remote Branch: {main_branch_name} + No config file found""" + captured = capsys.readouterr() + assert captured.out.strip() == expected_remote_output diff --git a/test/test_git_utils.py b/test/test_git_utils.py new file mode 100644 index 00000000..fc8ae927 --- /dev/null +++ b/test/test_git_utils.py @@ -0,0 +1,206 @@ +import shutil +import subprocess + +import git +import pytest + +from payu.git_utils import get_git_repository, GitRepository +from payu.git_utils import PayuBranchError, PayuGitWarning + +from test.common import tmpdir + + +@pytest.fixture(autouse=True) +def setup_and_teardown(): + # Create tmp directory + try: + tmpdir.mkdir() + except Exception as e: + print(e) + + yield + + # Remove tmp directory + try: + shutil.rmtree(tmpdir) + except Exception as e: + print(e) + + +def create_new_repo(repo_path): + """Helper function to initialise a repo and create first commit""" + repo = git.Repo.init(repo_path) + init_file = repo_path / "init.txt" + add_file_and_commit(repo, init_file) + return repo + + +def add_file_and_commit(repo, file_path, commit_no=0): + """Helper function to add a commit to repo""" + file_path.touch() + repo.index.add([file_path]) + repo.index.commit(f"Add commit {commit_no}") + return repo + + +def test_get_git_repo_invalid_repo_initialise(): + invalid_repo_path = tmpdir / "invalidRepo" + invalid_repo_path.mkdir() + repo = get_git_repository(invalid_repo_path, initialise=True) + assert not repo.bare + + +def test_get_git_repo_invalid_repo_catch_error(): + invalid_path = tmpdir / "invalidRepo" + invalid_path.mkdir() + expected_warning_msg = "Path is not a valid git repository: " + expected_warning_msg += str(invalid_path) + with pytest.warns(PayuGitWarning, match=expected_warning_msg): + repo = get_git_repository(invalid_path, catch_error=True) + assert repo is None + + +def test_get_git_user_info_no_config_set(): + # Testing this is tricky as don't want to remove any global configs for + # name or email. Instead using assumption that key 'testKey-54321' is not + # defined in the 'user' namespace. + repo_path = tmpdir / "test_repo" + create_new_repo(repo_path) + repo = GitRepository(repo_path) + value = repo.get_user_info('testKey-54321') + assert value is None + + +def test_get_git_user_info_config_set(): + repo_path = tmpdir / "test_repo" + create_new_repo(repo_path) + try: + # Set config that is local to temporary test repository only + subprocess.run('git config user.name "TestUserName"', + check=True, + shell=True, + cwd=repo_path) + print("User name set successfully.") + except subprocess.CalledProcessError as e: + print(f"Error setting user name: {e}") + + repo = GitRepository(repo_path) + value = repo.get_user_info('name') + + assert value == 'TestUserName' + + +@pytest.mark.parametrize("ref", ["branch", "hash", None]) +def test_git_checkout_new_branch_from_remote_ref(ref): + # Setup + remote_repo_path = tmpdir / 'remoteRepo' + remote_repo = create_new_repo(remote_repo_path) + main_branch = remote_repo.active_branch + main_branch_hash = main_branch.object.hexsha + + # Create branch_1 + branch_1 = remote_repo.create_head("branch-1") + remote_repo.git.checkout(branch_1) + add_file_and_commit(remote_repo, (remote_repo_path / 'file'), commit_no=1) + branch_1_hash = branch_1.object.hexsha + + assert main_branch_hash != branch_1_hash + + # Re-checkout main branch + remote_repo.git.checkout(main_branch) + + # Clone repo + cloned_repo_path = tmpdir / 'cloned_repo' + cloned_repo = remote_repo.clone(cloned_repo_path) + + if ref == "hash": + start_point = branch_1_hash + expected_hash = branch_1_hash + elif ref == "branch": + start_point = "branch-1" + expected_hash = branch_1_hash + else: + start_point = None + expected_hash = main_branch_hash + + # Test startpoint being remote branch/hash/None + repo = GitRepository(cloned_repo_path) + repo.checkout_branch('branch-2', + new_branch=True, + start_point=start_point) + + current_branch = cloned_repo.active_branch + current_hash = current_branch.object.hexsha + assert str(current_branch) == 'branch-2' + assert current_hash == expected_hash + + +def test_git_checkout_new_branch_existing(): + # Setup + repo_path = tmpdir / 'remoteRepo' + repo = create_new_repo(repo_path) + existing_branch = repo.active_branch + + # Test checkout branch with existing branch + repo = GitRepository(repo_path) + with pytest.raises(PayuBranchError): + repo.checkout_branch(str(existing_branch), + new_branch=True) + + +def test_git_checkout_non_existent_branch(): + # Setup + repo_path = tmpdir / 'remoteRepo' + create_new_repo(repo_path) + + # Test checkout branch with non-existent branch + repo = GitRepository(repo_path) + with pytest.raises(PayuBranchError): + repo.checkout_branch("Gibberish") + + +def test_git_checkout_staged_changes(): + # Setup + repo_path = tmpdir / 'remoteRepo' + create_new_repo(repo_path) + + repo = GitRepository(repo_path) + file_path = repo_path / 'newTestFile.txt' + file_path.touch() + + # Test checkout branch works with untracked files + repo.checkout_branch(new_branch=True, branch_name="NewBranch") + + # Test checkout raises error with staged changes + repo.repo.index.add([file_path]) + with pytest.raises(PayuBranchError): + repo.checkout_branch(new_branch=True, branch_name="NewBranch2") + + +def test_git_checkout_existing_branch(): + # Setup + remote_repo_path = tmpdir / 'remoteRepo' + remote_repo = create_new_repo(remote_repo_path) + main_branch = remote_repo.active_branch + + # Create branch_1 + branch_1 = remote_repo.create_head("branch-1") + remote_repo.git.checkout(branch_1) + add_file_and_commit(remote_repo, (remote_repo_path / 'file'), commit_no=1) + branch_1_hash = branch_1.object.hexsha + + # Re-checkout main branch + remote_repo.git.checkout(main_branch) + + # Clone repo + cloned_repo_path = tmpdir / 'cloned_repo' + cloned_repo = remote_repo.clone(cloned_repo_path) + + # Test checkout existing remote branch + repo = GitRepository(cloned_repo_path) + repo.checkout_branch('branch-1') + + current_branch = cloned_repo.active_branch + current_hash = current_branch.object.hexsha + assert str(current_branch) == 'branch-1' + assert current_hash == branch_1_hash diff --git a/test/test_metadata.py b/test/test_metadata.py new file mode 100644 index 00000000..b30adb9c --- /dev/null +++ b/test/test_metadata.py @@ -0,0 +1,396 @@ +import copy +import shutil +from datetime import datetime + +import pytest +from unittest.mock import patch, Mock +from ruamel.yaml import YAML + +from payu.metadata import Metadata, MetadataWarning + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir, archive_dir +from test.common import config as config_orig +from test.common import write_config + +verbose = True + +# Global config - Remove set experiment and metadata config +config = copy.deepcopy(config_orig) +config.pop("experiment") +config.pop("metadata") + +pytestmark = pytest.mark.filterwarnings( + "ignore::payu.git_utils.PayuGitWarning") + + +def setup_module(module): + """ + Put any test-wide setup code in here, e.g. creating test files + """ + if verbose: + print("setup_module module:%s" % module.__name__) + + try: + tmpdir.mkdir() + except Exception as e: + print(e) + + +def teardown_module(module): + """ + Put any test-wide teardown code in here, e.g. removing test outputs + """ + if verbose: + print("teardown_module module:%s" % module.__name__) + + try: + shutil.rmtree(tmpdir) + print('removing tmp') + except Exception as e: + print(e) + + +def mocked_get_git_user_info(config_key): + if config_key == 'name': + return 'mockUser' + elif config_key == 'email': + return 'mock@email.com' + else: + return None + + +@pytest.fixture(autouse=True) +def setup_and_teardown(): + try: + ctrldir.mkdir() + labdir.mkdir() + except Exception as e: + print(e) + + yield + + try: + shutil.rmtree(ctrldir) + shutil.rmtree(labdir) + except Exception as e: + print(e) + + +@patch("payu.metadata.GitRepository") +@pytest.mark.parametrize( + "uuid, legacy_archive_exists, previous_metadata, expected_metadata", + [ + # Test new metadata file created + ( + "b1f3ce3d-99da-40e4-849a-c8b352948a31", + False, + None, + { + "experiment_uuid": "b1f3ce3d-99da-40e4-849a-c8b352948a31", + "created": '2000-01-01', + "name": "DefaultExperimentName", + "model": "TEST-MODEL", + "url": "mockUrl", + "contact": "mockUser", + "email": "mock@email.com" + } + ), + # Test metadata file updated when new UUID + ( + "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + False, + { + "experiment_uuid": "b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04", + "contact": "Add your name here", + "email": "Add your email address here", + "description": "Add description here", + }, + { + "experiment_uuid": "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + "description": "Add description here", + "created": '2000-01-01', + "name": "DefaultExperimentName", + "model": "TEST-MODEL", + "url": "mockUrl", + "contact": "mockUser", + "email": "mock@email.com" + } + ), + # Test extra fields not added with legacy experiments + ( + "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + True, + { + "experiment_uuid": "0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75", + "name": "UserDefinedExperimentName", + "contact": "TestUser", + "email": "Test@email.com" + }, + { + "experiment_uuid": "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + "name": "UserDefinedExperimentName", + "contact": "TestUser", + "email": "Test@email.com" + } + ), + ] +) +def test_update_file(mock_repo, uuid, legacy_archive_exists, + previous_metadata, expected_metadata): + # Create pre-existing metadata file + metadata_path = ctrldir / 'metadata.yaml' + yaml = YAML() + if previous_metadata is not None: + with open(metadata_path, 'w') as file: + yaml.dump(previous_metadata, file) + + # Add mock git values + mock_repo.return_value.get_origin_url.return_value = "mockUrl" + mock_repo.return_value.get_user_info.side_effect = mocked_get_git_user_info + + # Setup config + test_config = config.copy() + test_config['model'] = "test-model" + write_config(test_config) + + # Initialise Metadata + with cd(ctrldir): + metadata = Metadata(archive_dir) + metadata.uuid = uuid + metadata.experiment_name = "DefaultExperimentName" + metadata.branch_uuid_experiment = not legacy_archive_exists + + # Mock datetime (for created date) + with patch('payu.metadata.datetime') as mock_date: + mock_date.now.return_value = datetime(2000, 1, 1) + + # Function to test + metadata.update_file() + + assert metadata_path.exists and metadata_path.is_file + + with open(metadata_path, 'r') as file: + metadata = yaml.load(metadata_path) + + assert metadata == expected_metadata + + +@pytest.mark.parametrize( + "uuid_exists, keep_uuid, is_new_experiment, " + "branch_uuid_archive_exists, legacy_archive_exists, catch_warning," + "expected_uuid, expected_name", + [ + # Keep UUID on new experiment - UUID Exists - no archives exist + ( + True, True, True, False, False, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl-mock_branch-3d18b3b6" + ), + # Keep UUID on new experiment - UUID Exists - legacy archive exists + ( + True, True, True, False, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl-mock_branch-3d18b3b6" + ), + # Keep UUID on not new experiement - UUID Exists -legacy archive exists + ( + True, True, False, False, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl" + ), + # Keep UUID on not new experiment - No UUID - no archives exist + ( + False, True, True, False, False, False, + "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", "ctrl-mock_branch-cb793e91" + ), + # Experiment setup - No UUID - legacy archive exists + ( + False, False, False, False, True, True, + "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", "ctrl" + ), + # Experiment setup - No UUID - no archive exists + ( + False, False, False, False, False, True, + "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", "ctrl-mock_branch-cb793e91" + ), + # Experiment setup - Existing UUID - legacy archive exists + ( + True, False, False, False, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl" + ), + # Experiment setup - Existing UUID - new archive exists + ( + True, False, False, True, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl-mock_branch-3d18b3b6" + ), + ] +) +def test_set_experiment_and_uuid(uuid_exists, keep_uuid, is_new_experiment, + branch_uuid_archive_exists, + legacy_archive_exists, catch_warning, + expected_uuid, expected_name): + # Setup config and metadata + write_config(config) + with cd(ctrldir): + metadata = Metadata(archive_dir) + + if uuid_exists: + metadata.uuid = "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136" + + if branch_uuid_archive_exists: + archive_path = archive_dir / "ctrl-mock_branch-3d18b3b6" + archive_path.mkdir(parents=True) + + if legacy_archive_exists: + archive_path = archive_dir / "ctrl" + archive_path.mkdir(parents=True) + + # Test set UUID and experiment name + with patch('payu.metadata.GitRepository.get_branch_name') as mock_branch, \ + patch('uuid.uuid4') as mock_uuid: + mock_branch.return_value = "mock_branch" + mock_uuid.return_value = "cb793e91-6168-4ed2-a70c-f6f9ccf1659b" + + if catch_warning: + # Test warning raised + with pytest.warns(MetadataWarning): + metadata.setup(is_new_experiment=is_new_experiment, + keep_uuid=keep_uuid) + else: + metadata.setup(is_new_experiment=is_new_experiment, + keep_uuid=keep_uuid) + + assert metadata.experiment_name == expected_name + assert metadata.uuid == expected_uuid + + +def test_set_configured_experiment_name(): + # Set experiment in config file + test_config = copy.deepcopy(config) + test_config['experiment'] = "configuredExperiment" + write_config(test_config) + + with cd(ctrldir): + metadata = Metadata(archive_dir) + + # Test configured experiment name is always the set experiment name + metadata.set_experiment_name() + assert metadata.experiment_name == "configuredExperiment" + + metadata.set_experiment_name(is_new_experiment=True) + assert metadata.experiment_name == "configuredExperiment" + + +@pytest.mark.parametrize( + "branch, expected_name", + [(None, "ctrl-cb793e91"), + ("main", "ctrl-cb793e91"), + ("master", "ctrl-cb793e91"), + ("branch", "ctrl-branch-cb793e91")] +) +def test_new_experiment_name(branch, expected_name): + # Test configured experiment name is the set experiment name + with cd(ctrldir): + metadata = Metadata(archive_dir) + + metadata.uuid = "cb793e91-6168-4ed2-a70c-f6f9ccf1659b" + + with patch('payu.metadata.GitRepository.get_branch_name') as mock_branch: + mock_branch.return_value = branch + experiment = metadata.new_experiment_name() + + assert experiment == expected_name + + +@pytest.mark.parametrize( + "branch, expected_name", + [(None, "ctrl"), + ("main", "ctrl"), + ("branch", "ctrl-branch")] +) +def test_new_experiment_name_ignore_uuid(branch, expected_name): + # Test configured experiment name is the set experiment name + with cd(ctrldir): + metadata = Metadata(archive_dir) + + with patch('payu.metadata.GitRepository.get_branch_name') as mock_branch: + mock_branch.return_value = branch + experiment = metadata.new_experiment_name(ignore_uuid=True) + + assert experiment == expected_name + + +@patch("payu.metadata.GitRepository") +def test_update_file_with_template_metadata_values(mock_repo): + # Leave out origin URL and git user info + mock_repo.return_value.get_origin_url.return_value = None + mock_repo.return_value.get_user_info.return_value = None + + # Setup config + test_config = config.copy() + test_config['model'] = "test-model" + write_config(test_config) + + # Initialise Metadata and UUID and experiment name + with cd(ctrldir): + metadata = Metadata(archive_dir) + metadata.experiment_name = "ctrldir-branch-cb793e91" + metadata.uuid = "cb793e91-6168-4ed2-a70c-f6f9ccf1659" + + with patch('requests.get') as mock_get: + # Mock request for json schema + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the experiment (string)" + }, + "experiment_uuid": { + "type": "string", + "format": "uuid", + "description": "Unique uuid for the experiment (string)" + }, + "description": { + "type": "string", + "description": ("Short description of the experiment " + "(string, < 150 char)") + }, + "long_description": { + "type": "string", + "description": ("Long description of the experiment " + "(string)") + }, + "model": { + "type": "array", + "items": {"type": ["string", "null"]}, + "description": ("The name(s) of the model(s) used in the" + " experiment (string)") + }, + }, + "required": [ + "name", + "experiment_uuid", + "description", + "long_description" + ] + } + mock_get.return_value = mock_response + + # Mock datetime (for created date) + with patch('payu.metadata.datetime') as mock_date: + mock_date.now.return_value = datetime(2000, 1, 1) + + # Test function + metadata.update_file(set_template_values=True) + + # Expect commented template values for non-null fields + expected_metadata = """experiment_uuid: cb793e91-6168-4ed2-a70c-f6f9ccf1659 +created: '2000-01-01' +name: ctrldir-branch-cb793e91 +model: TEST-MODEL +description: # Short description of the experiment (string, < 150 char) +long_description: # Long description of the experiment (string) +""" + assert (ctrldir / 'metadata.yaml').read_text() == expected_metadata diff --git a/test/test_payu.py b/test/test_payu.py index 8a6d4065..b1184fc5 100644 --- a/test/test_payu.py +++ b/test/test_payu.py @@ -264,4 +264,40 @@ def test_lib_update_if_nci_module_not_required(): 'libmpi_usempif08.so.40': '/$HOME/exe/spack-microarchitectures.git/opt/spack/linux-rocky8-cascadelake/intel-2019.5.281/openmpi-4.1.5-ooyg5wc7sa3tvmcpazqqb44pzip3wbyo/lib/libmpi_usempif08.so.40', } result = payu.envmod.lib_update(required_libs_dict, 'libmpi.so') - assert(result == '') \ No newline at end of file + assert (result == '') + + +def test_list_archive_dirs(): + # Create archive directories - mix of valid/invalid names + archive_dirs = [ + 'output000', 'output1001', 'output023', + 'output', 'Output001', 'output1', + 'Restart', 'restart2', 'restart', + 'restart102932', 'restart021', 'restart001', + ] + tmp_archive = tmpdir / 'test_archive' + for dir in archive_dirs: + (tmp_archive / dir).mkdir(parents=True) + + # Add some files + (tmp_archive / 'restart005').touch() + (tmp_archive / 'output005').touch() + + # Add a restart symlink + tmp_archive_2 = tmpdir / 'test_archive_2' + source_path = tmp_archive_2 / 'restart999' + source_path.mkdir(parents=True) + (tmp_archive / 'restart23042').symlink_to(source_path) + + # Test list output dirs and with string archive path + outputs = payu.fsops.list_archive_dirs(str(tmp_archive), dir_type="output") + assert outputs == ['output000', 'output023', 'output1001'] + + # Test list restarts + restarts = payu.fsops.list_archive_dirs(tmp_archive, dir_type="restart") + assert restarts == ['restart001', 'restart021', + 'restart23042', 'restart102932'] + + # Clean up test archive + shutil.rmtree(tmp_archive) + shutil.rmtree(tmp_archive_2) diff --git a/test/test_sync.py b/test/test_sync.py index e1d27a31..766480ec 100644 --- a/test/test_sync.py +++ b/test/test_sync.py @@ -10,7 +10,7 @@ from test.common import tmpdir, ctrldir, labdir, expt_archive_dir from test.common import config as config_orig from test.common import write_config -from test.common import make_all_files, make_random_file +from test.common import make_all_files, make_random_file, write_metadata from test.common import make_expt_archive_dir verbose = True @@ -18,6 +18,11 @@ # Global config config = copy.deepcopy(config_orig) +# Enable metadata +config.pop('metadata') +pytestmark = pytest.mark.filterwarnings( + "ignore::payu.git_utils.PayuGitWarning") + def setup_module(module): """ @@ -37,6 +42,7 @@ def setup_module(module): labdir.mkdir() ctrldir.mkdir() make_all_files() + write_metadata() except Exception as e: print(e) @@ -285,7 +291,8 @@ def test_sync(): sync.run() expected_dirs_synced = {'output000', 'output001', 'output002', - 'output003', 'output004', 'pbs_logs'} + 'output003', 'output004', + 'pbs_logs', 'metadata.yaml'} # Test output is moved to remote dir assert set(os.listdir(remote_archive)) == expected_dirs_synced