Skip to content

Commit

Permalink
Start implementing dataregistry ci
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartmcalpine committed Nov 3, 2023
1 parent b655529 commit f756c9c
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 6 deletions.
31 changes: 30 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Continuous Integration

on:
push:
branches: [ master ]
branches: [ master, data-registry ]
pull_request:
branches: [ master ]

Expand Down Expand Up @@ -41,3 +41,32 @@ jobs:
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1

test_data_registry:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.9]

steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
submodules: true

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

steps:
- name: Checkout dataregistry repository
uses: actions/checkout@v2
with:
repository: LSSTDESC/dataregistry
path: './dataregistry'

- name: Install
run: |
sudo apt-get update && sudo apt-get -y install libopenmpi-dev openmpi-bin graphviz graphviz-dev
pip install .[all]
11 changes: 6 additions & 5 deletions ceci/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,12 +471,13 @@ def setup_data_registry(self, registry_config): #pragma: no cover
# None the dataregistry will assume the users config file is in the
# default location (~/.config_reg_access).
registry = DataRegistry(config_file=registry_config.get("config", None),
owner_type=registry_config.get("config", "user"),
owner=registry_config.get("owner", None))
owner_type=registry_config.get("owner_type", "user"),
owner=registry_config.get("owner", None),
root_dir=registry_config.get("root_dir", None))

if not os.environ.get("NERSC_HOST"):
warnings.warn("The Data Registry is only available on NERSC: not setting it up now.")
return None
#if not os.environ.get("NERSC_HOST"):
# warnings.warn("The Data Registry is only available on NERSC: not setting it up now.")
# return None

# Save the things that may be useful.
return {
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ test = [
"dask[distributed]",
]

test_dataregistry = [
"pyyaml > 3",
"psutil",
"dataregistry @ git+https://github.com/LSSTDESC/dataregistry",
]

all = [
"parsl >= 1.0.0",
"flask",
Expand Down
28 changes: 28 additions & 0 deletions tests/create_registry_entries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
import sys

from dataregistry import DataRegistry

_TEST_ROOT_DIR = "DataRegistry_data"

# Make root dir
if not os.path.isdir(_TEST_ROOT_DIR):
os.makedirs(_TEST_ROOT_DIR)

# Establish connection to database
datareg = DataRegistry(root_dir=_TEST_ROOT_DIR)

# Add new entry.
datareg.Registrar.register_dataset(
"dm.txt",
"0.0.1",
verbose=True,
old_location="inputs/dm.txt"
)

datareg.Registrar.register_dataset(
"fiducial_cosmology.txt",
"0.0.1",
verbose=True,
old_location="inputs/fiducial_cosmology.txt"
)
94 changes: 94 additions & 0 deletions tests/test_dataregistry.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Python modules that are imported to find
# stage classes. Any stages imported in these
# modules are automatically detected and their names can
# be used below
modules: ceci_example

# The launcher to use
# These are defined in ceci/sites
launcher:
name: mini
interval: 0.5

# launcher:
# name: parsl
# # max_threads only referenced for local sites
# #log: parsl_log.txt

# launcher:
# name: cwl
# launcher: cwltool
# dir: ./test/cwl

site:
name: local
max_threads: 4
# max_threads: 4
# container: joezuntz/txpipe
# volume: $PWD:/opt/txpipe



#site:
# name: nersc-interactive
# # Put the log for the overall pipeline infrastructure in this file:
# pipeline_log: log.txt

# site:
# name: nersc-batch
# cpu_type: haswell
# queue: debug
# max_jobs: 2
# account: m1727
# walltime: "00:30:00"
# setup: /global/projecta/projectdirs/lsst/groups/WL/users/zuntz/setup-cori



# The list of stages to run and the number of processors
# to use for each.
stages:
- name: WLGCSummaryStatistic
nprocess: 1
threads_per_process: 2
- name: SysMapMaker
nprocess: 1
- name: shearMeasurementPipe
nprocess: 1
- name: PZEstimationPipe
nprocess: 1
- name: WLGCRandoms
nprocess: 1
- name: WLGCSelector
nprocess: 1
- name: SourceSummarizer
nprocess: 1
- name: WLGCTwoPoint
nprocess: 1
- name: WLGCCov
nprocess: 1

# Definitions of where to find inputs for the overall pipeline.
# Any input required by a pipeline stage that is not generated by
# a previous stage must be defined here. They are listed by tag.
inputs:
DM:
id: 1
fiducial_cosmology:
id: 2

# Overall configuration file
config: ./tests/config.yml

# If all the outputs for a stage already exist then do not re-run that stage
resume: False

# Put all the output files in this directory:
output_dir: ./tests/outputs

# Put the logs from the individual stages in this directory:
log_dir: ./tests/logs

# Point to the root directory of the dataregistry
registry:
root_dir: ./tests/DataRegistry_data

0 comments on commit f756c9c

Please sign in to comment.