From 10e5d870e5ef59639da6879d1eefbb4833557201 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 11:10:30 -0500 Subject: [PATCH 1/8] [python/ci] Remote-storage CI YAML --- .github/workflows/libtiledb-ci.yml | 21 +-- .github/workflows/libtiledbsoma-asan-ci.yml | 21 +-- .github/workflows/python-ci-minimal.yml | 25 ++-- .../workflows/python-dependency-variation.yml | 19 +-- .github/workflows/python-remote-storage.yml | 104 +++++++++++++++ .../workflows/r-python-interop-testing.yml | 26 ++-- apis/python/remote_tests/README.md | 45 +++++++ apis/python/remote_tests/__init__.py | 3 + apis/python/remote_tests/conftest.py | 47 +++++++ apis/python/remote_tests/test_01_setup.py | 82 ++++++++++++ apis/python/remote_tests/test_02_analysis.py | 121 ++++++++++++++++++ apis/python/remote_tests/test_03_versions.py | 119 +++++++++++++++++ apis/python/remote_tests/test_04_todo.py | 64 +++++++++ apis/python/remote_tests/util.py | 62 +++++++++ apis/python/src/tiledbsoma/io/ingest.py | 2 + 15 files changed, 711 insertions(+), 50 deletions(-) create mode 100644 .github/workflows/python-remote-storage.yml create mode 100644 apis/python/remote_tests/README.md create mode 100644 apis/python/remote_tests/__init__.py create mode 100644 apis/python/remote_tests/conftest.py create mode 100644 apis/python/remote_tests/test_01_setup.py create mode 100644 apis/python/remote_tests/test_02_analysis.py create mode 100644 apis/python/remote_tests/test_03_versions.py create mode 100644 apis/python/remote_tests/test_04_todo.py create mode 100644 apis/python/remote_tests/util.py diff --git a/.github/workflows/libtiledb-ci.yml b/.github/workflows/libtiledb-ci.yml index 9bbfa562fc..87f7b118fe 100644 --- a/.github/workflows/libtiledb-ci.yml +++ b/.github/workflows/libtiledb-ci.yml @@ -1,15 +1,18 @@ name: libTileDB-SOMA CodeCov on: - pull_request: - paths-ignore: - - "apis/python/**" - - "apis/r/**" - - ".pre-commit-config.yaml" - push: - branches: - - main - - 'release-*' +# XXX TEMP +# pull_request: +# paths-ignore: +# - "apis/python/**" +# - "apis/r/**" +# - ".pre-commit-config.yaml" +# # XXX TEMP +# - ".github/workflows/python-remote-storage.yml" +# push: +# branches: +# - main +# - 'release-*' workflow_dispatch: jobs: diff --git a/.github/workflows/libtiledbsoma-asan-ci.yml b/.github/workflows/libtiledbsoma-asan-ci.yml index 9b08b6fbbe..45e1271024 100644 --- a/.github/workflows/libtiledbsoma-asan-ci.yml +++ b/.github/workflows/libtiledbsoma-asan-ci.yml @@ -1,15 +1,18 @@ name: libtiledbsoma ASAN on: - pull_request: - paths-ignore: - - "apis/python/**" - - "apis/r/**" - - ".pre-commit-config.yaml" - push: - branches: - - main - - 'release-*' +# XXX TEMP +# pull_request: +# paths-ignore: +# - "apis/python/**" +# - "apis/r/**" +# - ".pre-commit-config.yaml" +# # XXX TEMP +# - ".github/workflows/python-remote-storage.yml" +# push: +# branches: +# - main +# - 'release-*' workflow_dispatch: jobs: diff --git a/.github/workflows/python-ci-minimal.yml b/.github/workflows/python-ci-minimal.yml index 7168102607..81d214e777 100644 --- a/.github/workflows/python-ci-minimal.yml +++ b/.github/workflows/python-ci-minimal.yml @@ -8,18 +8,19 @@ name: TileDB-SOMA Python CI (Minimal) # To test the full matrix on a working branch, invoke ./python-ci-full.yml from # https://github.com/single-cell-data/TileDB-SOMA/actions/workflows/python-ci-full.yml on: - pull_request: - branches: - - main - - 'release-*' - paths: - - '**' - - '!**.md' - - '!apis/r/**' - - '!docs/**' - - '!.github/**' - - '.github/workflows/python-ci-minimal.yml' - - '.github/workflows/python-ci-single.yml' + # XXX TEMP +# pull_request: +# branches: +# - main +# - 'release-*' +# paths: +# - '**' +# - '!**.md' +# - '!apis/r/**' +# - '!docs/**' +# - '!.github/**' +# - '.github/workflows/python-ci-minimal.yml' +# - '.github/workflows/python-ci-single.yml' workflow_dispatch: jobs: diff --git a/.github/workflows/python-dependency-variation.yml b/.github/workflows/python-dependency-variation.yml index db6417013d..3f2c654d1f 100644 --- a/.github/workflows/python-dependency-variation.yml +++ b/.github/workflows/python-dependency-variation.yml @@ -1,15 +1,16 @@ name: TileDB-SOMA Python CI with varying dependencies on: - push: - branches: - - main - - 'release-*' - pull_request: - paths-ignore: - - '**.md' - - 'apis/r/**' - - 'docs/**' +# XXX TEMP +# push: +# branches: +# - main +# - 'release-*' +# pull_request: +# paths-ignore: +# - '**.md' +# - 'apis/r/**' +# - 'docs/**' workflow_dispatch: jobs: diff --git a/.github/workflows/python-remote-storage.yml b/.github/workflows/python-remote-storage.yml new file mode 100644 index 0000000000..8c5b8e8664 --- /dev/null +++ b/.github/workflows/python-remote-storage.yml @@ -0,0 +1,104 @@ +name: TileDB-SOMA Python CI (remote storage) + +on: + workflow_dispatch: + # + # Not for regular use: + # TEMP + pull_request: + # + # TODO: a nightly cron + +env: + # Don't name this "TILEDB_REST_TOKEN" since that will map into a core + # env/config override, as if config key "rest.token" had been set. One of the + # purposes of this CI is to run tests where all config is passed via context + # arguments and none via environment variables, in order to flush out + # callsites within the code which aren't passing context as they should. + TILEDB_REST_UNITTEST_TOKEN: ${{ secrets.TILEDB_REST_UNITTEST_TOKEN}} + +jobs: + ci: + strategy: + fail-fast: false + matrix: + include: + # This could be run on MacOS too, but, we have enough OS redundancy, + # and MacOS-runner availability is a more tightly constrained resource + # in GitHub Actions as of 2025-02-06. + - name: linux + os: ubuntu-24.04 + # TODO: also on 3.12. But 3.9 is higher-pri, until we drop support + # for it. (Note our main CI tests across a broader set of Python + # versions.) + python_version: 3.9 + cc: gcc-13 + cxx: g++-13 + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + + - name: Show matrix OS + run: echo "matrix.os:" ${{ matrix.os }} + + - name: Linux CPU info + if: ${{ matrix.os == 'ubuntu-24.04' }} + run: cat /proc/cpuinfo + + - name: MacOS CPU info + if: ${{ matrix.os == 'macOS-latest' }} + run: sysctl -a | grep cpu + + - name: Select XCode version + if: startsWith(matrix.os, 'macos') + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '15.4' + + - name: Set up Python ${{ matrix.python_version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + cache: pip + cache-dependency-path: ./apis/python/setup.py + + - name: Show XCode version + run: clang --version + + - name: Check out TileDB-SOMA + uses: actions/checkout@v4 + with: + fetch-depth: 0 # ensure we get all tags to inform package version determination + + - name: Log pip dependencies + run: pip list + + - name: Install tiledbsoma + # If you'reiterating on the testing framework itself, install from PyPI to avoid the + # time-consuming build. If you're trying to evaluate the code modified by a PR, + # install from source. + # + # From PyPI: + # run: pip install tiledbsoma==1.15.7 + # + # From source: + run: pip -v install -e apis/python[all] -C "--build-option=--no-tiledb-deprecated" + # + env: + CC: ${{ matrix.cc }} + CXX: ${{ matrix.cxx }} + + - name: Install dependencies + run: pip install --prefer-binary pytest typeguard tiledb.cloud + + - name: Show package versions + run: python scripts/show-versions.py + + - name: Show tiledb.cloud version + run: python -c 'import tiledb.cloud; print(tiledb.cloud.version.version)' + + - name: Run pytests for Python + shell: bash + run: python -m pytest apis/python/remote_tests -v --durations=20 --maxfail=50 diff --git a/.github/workflows/r-python-interop-testing.yml b/.github/workflows/r-python-interop-testing.yml index 2687f8af7b..b4c2f2110b 100644 --- a/.github/workflows/r-python-interop-testing.yml +++ b/.github/workflows/r-python-interop-testing.yml @@ -1,17 +1,21 @@ name: TileDB-SOMA R-Python interop testing on: - pull_request: - # TODO: leave this enabled for pre-merge signal for now. At some point we may want to go back to - # only having this signal post-merge. - #paths: - # - "apis/python/**" - # - "apis/r/**" - # - "apis/system/**" - push: - branches: - - main - - "release-*" +# XXX TEMP +# pull_request: +# paths-ignore: +# # XXX TEMP +# - ".github/workflows/python-remote-storage.yml" +# # TODO: leave this enabled for pre-merge signal for now. At some point we may want to go back to +# # only having this signal post-merge. +# #paths: +# # - "apis/python/**" +# # - "apis/r/**" +# # - "apis/system/**" +# push: +# branches: +# - main +# - "release-*" workflow_dispatch: jobs: diff --git a/apis/python/remote_tests/README.md b/apis/python/remote_tests/README.md new file mode 100644 index 0000000000..1870871e26 --- /dev/null +++ b/apis/python/remote_tests/README.md @@ -0,0 +1,45 @@ +# How to run these tests + +``` +export TILEDB_REST_TOKEN="..." # Get the token for the Saas `unittest` user +unsetTILEDB_REST_PAYER_NAMESPACE # If you have that set +``` + +As of 2025-02-07, use Python 3.9 to run UDF tests; otherwise they will be skipped. + +``` +python -m pytest path/to/this/directory +``` + +# Test-data setup + +This is what was done for initial setup of these tests, and what should be done for future releases. + +``` +export TILEDB_REST_TOKEN="..." # Get the token for the Saas `unittest` user +export TILEDB_REST_PAYER_NAMESPACE=unittest +``` + +Here are source data you can find in the sandbox account `unittest` space: + +``` +s3://tiledb-unittest/soma-prod-test-data/h5ad/pbmc3k_unprocessed.h5ad +s3://tiledb-unittest/soma-prod-test-data/h5ad/pbmc3k_processed.h5ad +``` + +Local copy: + +``` +aws s3 cp s3://tiledb-unittest/soma-prod-test-data/h5ad . +``` + +Then use `tiledbsoma.io.from_h5ad` with the following sources and data: + +* Preferr a bare Docker image +* Repeat for all desired TileDB-SOMA versions: + * `pip install tiledbsoma==1.15.7` (or whichever version) + * Ingest to `s3://tiledb-unittest/soma-prod-test-data/1.15.7/pbmc3k_unprocessed_1.15.7` + * Register this in the cloud UI + * Note: as of 2025-02-07 the cloud UI disallows `.` in group names so register with name like `1_15_7`. + * Tracked at [sc-63068](https://app.shortcut.com/tiledb-inc/story/63068/allow-in-registration-paths) + * Do not ingest directly to `tiledb://unittest/s3://tiledb-unittest/soma-prod-test-data/1.15.7/pbmc3k_unprocessed_1.15.7` since this will use today's version of core server-side, and what we want to really test is data written entirely by the pip-installed versions of tiledbsoma and core. diff --git a/apis/python/remote_tests/__init__.py b/apis/python/remote_tests/__init__.py new file mode 100644 index 0000000000..eeedbf74bc --- /dev/null +++ b/apis/python/remote_tests/__init__.py @@ -0,0 +1,3 @@ +from typeguard import install_import_hook + +install_import_hook("tiledbsoma") diff --git a/apis/python/remote_tests/conftest.py b/apis/python/remote_tests/conftest.py new file mode 100644 index 0000000000..2e9fd9a5d4 --- /dev/null +++ b/apis/python/remote_tests/conftest.py @@ -0,0 +1,47 @@ +import os + +import pytest + +import tiledbsoma +import tiledb.cloud + + +@pytest.fixture +def conftest_token(): + env_name = "TILEDB_REST_UNITTEST_TOKEN" + token = os.getenv(env_name) + if token is None: + raise Exception(f'Environment variable "{env_name}" is not set') + return token + + +@pytest.fixture +def conftest_tiledb_cloud_login(conftest_token): + print("conftest_tiledb_cloud_login") + tiledb.cloud.login(token=conftest_token) + return None + + +@pytest.fixture +def conftest_user_profile(conftest_tiledb_cloud_login): + return tiledb.cloud.user_profile() + + +@pytest.fixture +def conftest_namespace(conftest_user_profile): + return conftest_user_profile.username + + +@pytest.fixture +def conftest_default_s3_path(conftest_user_profile): + return conftest_user_profile.default_s3_path + + +@pytest.fixture +def conftest_context(conftest_token, conftest_namespace): + return tiledbsoma.SOMATileDBContext( + tiledb_config={ + "rest.token": conftest_token, + "rest.payer_namespace": conftest_namespace, + } + ) diff --git a/apis/python/remote_tests/test_01_setup.py b/apis/python/remote_tests/test_01_setup.py new file mode 100644 index 0000000000..1d3840ad88 --- /dev/null +++ b/apis/python/remote_tests/test_01_setup.py @@ -0,0 +1,82 @@ +# These are test that need to run first to check basic functionality, before we go on to test other, +# more complex things. +from __future__ import annotations + +import os +import sys + +import pytest +import scanpy + +import tiledbsoma +import tiledbsoma.io +import tiledb.cloud + +from .util import util_make_uri, util_tear_down_uri + +# Nominally this is the 'unittest' SaaS user. What we require is: +# +# * The user can _read_ data in the 'unittest' namespace. +# * For data _written_, the namespace and default_s3_path are taken from the +# cloud profile. +# +# For CI, this environment variable is a GitHub Actions secret, propagated in +# the CI YAML. +if os.getenv("TILEDB_REST_UNITTEST_TOKEN") is None: + pytest.skip( + reason="$TILEDB_REST_UNITTEST_TOKEN is not set", allow_module_level=True + ) + + +def test_skipping_correctly(): + assert os.getenv("TILEDB_REST_UNITTEST_TOKEN") is not None + + +def test_basic_read(conftest_context): + uri = "tiledb://unittest/pbmc3k_unprocessed_1_15_7" + assert tiledbsoma.Experiment.exists(uri, context=conftest_context) + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + assert exp.obs.count == 2700 + assert "RNA" in exp.ms + assert exp.ms["RNA"].var.count == 13714 + + +def test_basic_write(conftest_context, conftest_namespace, conftest_default_s3_path): + (creation_uri, readback_uri) = util_make_uri( + "soma-prod-ephemeral-data", + "ephemeral_basic_write", + conftest_namespace, + conftest_default_s3_path, + ) + + adata = scanpy.datasets.pbmc3k() + + tiledbsoma.io.from_anndata( + creation_uri, + adata, + measurement_name="RNA", + context=conftest_context, + ) + + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: + assert exp.obs.count == 2700 + assert "RNA" in exp.ms + assert exp.ms["RNA"].var.count == 32738 + + util_tear_down_uri(readback_uri) + + +@pytest.mark.skipif( + (sys.version_info.major, sys.version_info.minor) != (3, 9), + reason="As of 2025-02-05 UDFs require Python 3.9", +) +def test_remote_version(conftest_tiledb_cloud_login): + + def remote_version(): + import tiledbsoma + + return {"tiledbsoma": tiledbsoma.__version__} + + output = tiledb.cloud.udf.exec(remote_version) + assert "tiledbsoma" in output + assert output["tiledbsoma"].startswith("1.") diff --git a/apis/python/remote_tests/test_02_analysis.py b/apis/python/remote_tests/test_02_analysis.py new file mode 100644 index 0000000000..c7f56e7b6c --- /dev/null +++ b/apis/python/remote_tests/test_02_analysis.py @@ -0,0 +1,121 @@ +# These are test that need to run first to check basic functionality, before we go on to test other, +# more complex things. +from __future__ import annotations + +import os + +import pandas as pd +import pytest +import scanpy as sc + +import tiledbsoma +import tiledbsoma.io +import tiledbsoma.logging + +from .util import util_make_uri, util_tear_down_uri + +if os.getenv("TILEDB_REST_UNITTEST_TOKEN") is None: + pytest.skip( + reason="$TILEDB_REST_UNITTEST_TOKEN is not set", allow_module_level=True + ) + + +def test_write_with_updates( + conftest_context, conftest_namespace, conftest_default_s3_path +): + (creation_uri, readback_uri) = util_make_uri( + "soma-prod-ephemeral-data", + "ephemeral_analysis", + conftest_namespace, + conftest_default_s3_path, + ) + + adata = sc.datasets.pbmc3k() + + tiledbsoma.logging.info() + tiledbsoma.io.from_anndata( + creation_uri, + adata, + measurement_name="RNA", + context=conftest_context, + ) + + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: + assert "RNA" in exp.ms + + assert exp.metadata.get("dataset_type") == "soma" + assert exp.metadata.get("soma_object_type") == "SOMAExperiment" + assert exp.obs.metadata.get("soma_object_type") == "SOMADataFrame" + assert exp.ms["RNA"].var.metadata.get("soma_object_type") == "SOMADataFrame" + assert "data" in exp.ms["RNA"].X + assert ( + exp.ms["RNA"].X["data"].metadata.get("soma_object_type") + == "SOMASparseNDArray" + ) + + assert exp.obs.count == adata.obs.shape[0] + assert exp.ms["RNA"].var.count == adata.var.shape[0] + + obs_arrow = exp.obs.read().concat() + obs_pandas = obs_arrow.to_pandas() + assert obs_pandas.shape[0] == adata.obs.shape[0] + + # Here we augment that with some on-the-fly computed data. This imitates a common customer workflow. + # Add a categorical column + parity = [["even", "odd"][e % 2] for e in range(len(adata.obs))] + adata.obs["parity"] = pd.Categorical(parity) + with tiledbsoma.Experiment.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.update_obs(exp, adata.obs, context=conftest_context) + + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: + obs_arrow = exp.obs.read().concat() + obs_pandas = obs_arrow.to_pandas() + assert obs_pandas.shape[0] == adata.obs.shape[0] + + sc.pp.normalize_total(adata, inplace=True) + sc.pp.log1p(adata, copy=False) + + with tiledbsoma.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.add_X_layer( + exp, + measurement_name="RNA", + X_layer_name="logcounts", + X_layer_data=adata.X, + context=conftest_context, + ) + + with tiledbsoma.open(readback_uri, "w", context=conftest_context) as exp: + assert sorted(list(exp.ms["RNA"].X.keys())) == ["data", "logcounts"] + + # Add dimensional-reduction results + sc.pp.highly_variable_genes(adata, inplace=True) + adata = adata[:, adata.var.highly_variable] + sc.pp.scale(adata) + sc.tl.pca(adata, use_highly_variable=True, n_comps=5) + + with tiledbsoma.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.add_matrix_to_collection( + exp=exp, + measurement_name="RNA", + collection_name="obsm", + matrix_name="logcounts_pca", + matrix_data=adata.obsm["X_pca"], + context=conftest_context, + ) + + with tiledbsoma.open(readback_uri, "w", context=conftest_context) as exp: + assert sorted(list(exp.ms["RNA"].obsm.keys())) == ["logcounts_pca"] + + with tiledbsoma.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.add_matrix_to_collection( + exp=exp, + measurement_name="RNA", + collection_name="varm", + matrix_name="logcounts_pcs", + matrix_data=adata.varm["PCs"], + context=conftest_context, + ) + with tiledbsoma.open(exp.uri, context=conftest_context) as exp: + assert sorted(list(exp.ms["RNA"].varm.keys())) == ["logcounts_pcs"] + + util_tear_down_uri(readback_uri) diff --git a/apis/python/remote_tests/test_03_versions.py b/apis/python/remote_tests/test_03_versions.py new file mode 100644 index 0000000000..175fc44d8d --- /dev/null +++ b/apis/python/remote_tests/test_03_versions.py @@ -0,0 +1,119 @@ +# These are test that need to run first to check basic functionality, before we go on to test other, +# more complex things. +from __future__ import annotations + +import os + +import pytest + +import tiledbsoma +import tiledbsoma.io + +from .util import util_pbmc3k_unprocessed_versions + +if os.getenv("TILEDB_REST_UNITTEST_TOKEN") is None: + pytest.skip( + reason="$TILEDB_REST_UNITTEST_TOKEN is not set", allow_module_level=True + ) + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_basic_readback(conftest_context, uri_and_info): + uri, info = uri_and_info + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + + md = dict(exp.metadata) + assert md["dataset_type"] == "soma" + assert md["soma_object_type"] == "SOMAExperiment" + + md = dict(exp.obs.metadata) + assert md["soma_object_type"] == "SOMADataFrame" + + md = dict(exp.ms["RNA"].var.metadata) + assert md["soma_object_type"] == "SOMADataFrame" + + md = dict(exp.ms["RNA"].X["data"].metadata) + assert md["soma_object_type"] == "SOMASparseNDArray" + + obs_table = exp.obs.read().concat() + assert len(obs_table) == 2700 + obs_df = obs_table.to_pandas() + assert obs_df.shape == (2700, 6) + + var_table = exp.ms["RNA"].var.read().concat() + assert len(var_table) == 13714 + var_df = var_table.to_pandas() + assert var_df.shape == (13714, 2) + + X_coo = exp.ms["RNA"].X["data"].read().coos().concat() + if info["shape"] == "old": + assert X_coo.shape == (2147483646, 2147483646) + else: + assert X_coo.shape == (2700, 13714) + + # Implicitly checking for no throw + adata = tiledbsoma.io.to_anndata(exp, "RNA") + + assert adata.obs.shape == (2700, 4) + assert adata.var.shape == (13714, 0) + assert adata.X.shape == (2700, 13714) + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_dataframe_queries(conftest_context, uri_and_info): + uri, info = uri_and_info + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + + qobs = ( + exp.obs.read( + coords=[slice(0, 99)], + value_filter="nFeature_RNA > 1000", + column_names=["soma_joinid", "obs_id", "nFeature_RNA"], + ) + .concat() + .to_pandas() + ) + assert qobs.shape == (22, 3) + + qvar = ( + exp.ms["RNA"] + .var.read( + value_filter="var_id in ['ANXA1', 'IFI44', 'IFI44L', 'OAS1']", + ) + .concat() + .to_pandas() + ) + assert qvar.shape == (4, 2) + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_experiment_queries(conftest_context, uri_and_info): + uri, info = uri_and_info + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + + query = tiledbsoma.ExperimentAxisQuery( + experiment=exp, + measurement_name="RNA", + obs_query=tiledbsoma.AxisQuery( + value_filter="nFeature_RNA > 1000", + ), + var_query=tiledbsoma.AxisQuery( + value_filter="var_id in ['ANXA1', 'IFI44', 'IFI44L', 'OAS1']", + ), + ) + + assert (query.n_obs, query.n_vars) == (530, 4) + + +# tiledbsoma.io.show_experiment_shapes +# tiledbsoma.io.upgrade_experiment_shapes +# tiledbsoma.io.resize_experiment diff --git a/apis/python/remote_tests/test_04_todo.py b/apis/python/remote_tests/test_04_todo.py new file mode 100644 index 0000000000..e3ef75b122 --- /dev/null +++ b/apis/python/remote_tests/test_04_todo.py @@ -0,0 +1,64 @@ +# +#``` +## ================================================================ +### UDFs +#def remote_obs_schema(exp_uri): +# import tiledbsoma +# exp = tiledbsoma.Experiment.open(exp_uri) +# return exp.obs.schema +#import tiledb.cloud +#import tiledb.cloud.udf +#tiledb.cloud.udf.exec( +# remote_obs_schema, +# soma_pbmc3k_uri, +#) +# +#def remote_query(exp_uri): +# import tiledbsoma +# exp = tiledbsoma.Experiment.open(exp_uri) +# +# query = tiledbsoma.ExperimentAxisQuery( +# experiment=exp, +# measurement_name="RNA", +# obs_query=tiledbsoma.AxisQuery( +# value_filter="n_genes_by_counts > 1000", +# ), +# var_query=tiledbsoma.AxisQuery( +# value_filter="n_cells_by_counts > 100", +# ), +# ) +# +# return (query.n_obs, query.n_vars) +#tiledb.cloud.udf.exec( +# remote_query, soma_pbmc3k_uri, +#) +# +## ================================================================ +## Collection-mapper test +#from tiledb.cloud.taskgraphs import client_executor as executor +#soco_uri = 'tiledb://TileDB-Inc/stack-small-soco-staging' +#res = tiledb.cloud.udf.exec( +# 'TileDB-Inc/soma_experiment_collection_mapper', +# soco_uri=soco_uri, +# measurement_name="RNA", +# X_layer_name="data", +# # callback = lambda x: x.obs.shape, +# # callback = lambda x: x, +# callback = lambda adata: [adata.obs.shape, adata.var.shape, adata.X.shape], +# # callback = lambda adata: adata.var, +# args_dict={}, +# reducer = lambda x: x, +# obs_attrs = ['obs_id', 'cell_type', 'is_primary_data'], +# var_attrs = ['var_id', 'means'], +#) +#dag = executor.LocalExecutor(res, namespace = "TileDB-Inc") +#dag.visualize() +##%%time +#dag.execute() +#dag.wait() +#dag.node("output").result() +#``` + +#* Make small stack; provenance +#* Append mode: Monday/Tuesday is fine +#* Show, upgrade, resize diff --git a/apis/python/remote_tests/util.py b/apis/python/remote_tests/util.py new file mode 100644 index 0000000000..975fc32534 --- /dev/null +++ b/apis/python/remote_tests/util.py @@ -0,0 +1,62 @@ +import datetime +import os +import pathlib +import shutil +from typing import Tuple + +import tiledb.cloud + +# For cloud: +# * Create with timestamp +# * Delete on teardown +# For local: +# * Create without timestamp +# o Only remove the URI from a _previous_ run (if any) +# * Do not delete on teardown -- so developers can look at the data + + +def util_make_uri( + dirname: str, + basename: str, + namespace: str, + default_s3_path: str, +) -> Tuple[str, str]: + if os.getenv("TILEDB_SOMA_CLOUD_TEST_LOCAL_PATHS") is None: + + # The default_s3_path contains the "s3://..." prefix and a trailing slash. + # Note that double slashes can cause group-creation failures so we need + # to carefully strip them out. + bucket = (default_s3_path).rstrip("/") + stamp = datetime.datetime.today().strftime("%Y%m%d-%H%M%S") + creation_uri = f"tiledb://{namespace}/{bucket}/{dirname}/{basename}_{stamp}" + readback_uri = f"tiledb://{namespace}/{basename}_{stamp}" + return (creation_uri, readback_uri) + + else: + uri = f"/tmp/tiledbsoma-cloud-test/{dirname}/{basename}" + if os.path.exists(uri): + shutil.rmtree(uri) + pathlib.Path(os.path.dirname(uri)).mkdir(parents=True, exist_ok=True) + print() + print("USING LOCAL URI", uri) + print() + return (uri, uri) + + +def util_tear_down_uri(uri): + # This assumes tiledb.cloud.login has already been called at util_make_uri. + if uri.startswith("tiledb://"): + tiledb.cloud.groups.delete(uri=uri, recursive=True) + # Delete local URIs only on _next_ run, so devs can inspect + + +def util_pbmc3k_unprocessed_versions(): + # New shape as in https://github.com/single-cell-data/TileDB-SOMA/issues/2407 + # which was released with tiledbsoma 1.15.0. + return [ + ["tiledb://unittest/pbmc3k_unprocessed_1_7_3", {"shape": "old"}], + ["tiledb://unittest/pbmc3k_unprocessed_1_12_3", {"shape": "old"}], + ["tiledb://unittest/pbmc3k_unprocessed_1_14_5", {"shape": "old"}], + ["tiledb://unittest/pbmc3k_unprocessed_1_15_0", {"shape": "new"}], + ["tiledb://unittest/pbmc3k_unprocessed_1_15_7", {"shape": "new"}], + ] diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index eede5e1fbc..1c246b9d5e 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1865,6 +1865,7 @@ def add_X_layer( X_layer_data: Union[Matrix, h5py.Dataset], ingest_mode: IngestMode = "write", use_relative_uri: bool | None = None, + context: SOMATileDBContext | None = None, ) -> None: """This is useful for adding X data, for example from `Scanpy `_'s ``scanpy.pp.normalize_total``, @@ -1884,6 +1885,7 @@ def add_X_layer( matrix_data=X_layer_data, ingest_mode=ingest_mode, use_relative_uri=use_relative_uri, + context=context, ) From 00755035a7fb4bfcad34db902921d16991284f39 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 11:27:39 -0500 Subject: [PATCH 2/8] neaten [skip ci] --- apis/python/remote_tests/test_01_setup.py | 5 ++- apis/python/remote_tests/test_02_analysis.py | 2 - apis/python/remote_tests/test_03_versions.py | 7 --- apis/python/remote_tests/test_04_todo.py | 47 +++++++++----------- 4 files changed, 25 insertions(+), 36 deletions(-) diff --git a/apis/python/remote_tests/test_01_setup.py b/apis/python/remote_tests/test_01_setup.py index 1d3840ad88..88d84c0948 100644 --- a/apis/python/remote_tests/test_01_setup.py +++ b/apis/python/remote_tests/test_01_setup.py @@ -1,5 +1,6 @@ -# These are test that need to run first to check basic functionality, before we go on to test other, -# more complex things. +# These are test that need to run first to check basic functionality, before we +# go on to test other, more complex things. + from __future__ import annotations import os diff --git a/apis/python/remote_tests/test_02_analysis.py b/apis/python/remote_tests/test_02_analysis.py index c7f56e7b6c..20da5a8c6f 100644 --- a/apis/python/remote_tests/test_02_analysis.py +++ b/apis/python/remote_tests/test_02_analysis.py @@ -1,5 +1,3 @@ -# These are test that need to run first to check basic functionality, before we go on to test other, -# more complex things. from __future__ import annotations import os diff --git a/apis/python/remote_tests/test_03_versions.py b/apis/python/remote_tests/test_03_versions.py index 175fc44d8d..ae95e1766d 100644 --- a/apis/python/remote_tests/test_03_versions.py +++ b/apis/python/remote_tests/test_03_versions.py @@ -1,5 +1,3 @@ -# These are test that need to run first to check basic functionality, before we go on to test other, -# more complex things. from __future__ import annotations import os @@ -112,8 +110,3 @@ def test_experiment_queries(conftest_context, uri_and_info): ) assert (query.n_obs, query.n_vars) == (530, 4) - - -# tiledbsoma.io.show_experiment_shapes -# tiledbsoma.io.upgrade_experiment_shapes -# tiledbsoma.io.resize_experiment diff --git a/apis/python/remote_tests/test_04_todo.py b/apis/python/remote_tests/test_04_todo.py index e3ef75b122..f0c1df4e7f 100644 --- a/apis/python/remote_tests/test_04_todo.py +++ b/apis/python/remote_tests/test_04_todo.py @@ -1,22 +1,20 @@ -# -#``` ## ================================================================ ### UDFs -#def remote_obs_schema(exp_uri): +# def remote_obs_schema(exp_uri): # import tiledbsoma # exp = tiledbsoma.Experiment.open(exp_uri) # return exp.obs.schema -#import tiledb.cloud -#import tiledb.cloud.udf -#tiledb.cloud.udf.exec( +# import tiledb.cloud +# import tiledb.cloud.udf +# tiledb.cloud.udf.exec( # remote_obs_schema, # soma_pbmc3k_uri, -#) +# ) # -#def remote_query(exp_uri): +# def remote_query(exp_uri): # import tiledbsoma # exp = tiledbsoma.Experiment.open(exp_uri) -# +# # query = tiledbsoma.ExperimentAxisQuery( # experiment=exp, # measurement_name="RNA", @@ -27,17 +25,17 @@ # value_filter="n_cells_by_counts > 100", # ), # ) -# +# # return (query.n_obs, query.n_vars) -#tiledb.cloud.udf.exec( +# tiledb.cloud.udf.exec( # remote_query, soma_pbmc3k_uri, -#) +# ) # ## ================================================================ ## Collection-mapper test -#from tiledb.cloud.taskgraphs import client_executor as executor -#soco_uri = 'tiledb://TileDB-Inc/stack-small-soco-staging' -#res = tiledb.cloud.udf.exec( +# from tiledb.cloud.taskgraphs import client_executor as executor +# soco_uri = 'tiledb://TileDB-Inc/stack-small-soco-staging' +# res = tiledb.cloud.udf.exec( # 'TileDB-Inc/soma_experiment_collection_mapper', # soco_uri=soco_uri, # measurement_name="RNA", @@ -50,15 +48,14 @@ # reducer = lambda x: x, # obs_attrs = ['obs_id', 'cell_type', 'is_primary_data'], # var_attrs = ['var_id', 'means'], -#) -#dag = executor.LocalExecutor(res, namespace = "TileDB-Inc") -#dag.visualize() +# ) +# dag = executor.LocalExecutor(res, namespace = "TileDB-Inc") +# dag.visualize() ##%%time -#dag.execute() -#dag.wait() -#dag.node("output").result() -#``` +# dag.execute() +# dag.wait() +# dag.node("output").result() -#* Make small stack; provenance -#* Append mode: Monday/Tuesday is fine -#* Show, upgrade, resize +# * Make small stack; provenance +# * Append mode: Monday/Tuesday is fine +# * Show, upgrade, resize From 896cfc63229e0e469fcf5e8fb54e660e079a6f6e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 12:16:28 -0500 Subject: [PATCH 3/8] append --- apis/python/remote_tests/test_04_append.py | 67 +++++++++++++++++++ .../{test_04_todo.py => test_99_todo.py} | 0 apis/python/remote_tests/util.py | 1 + 3 files changed, 68 insertions(+) create mode 100644 apis/python/remote_tests/test_04_append.py rename apis/python/remote_tests/{test_04_todo.py => test_99_todo.py} (100%) diff --git a/apis/python/remote_tests/test_04_append.py b/apis/python/remote_tests/test_04_append.py new file mode 100644 index 0000000000..f6eec2f81d --- /dev/null +++ b/apis/python/remote_tests/test_04_append.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import os + +import pytest + +import tiledbsoma +import tiledbsoma.io +import scanpy as sc + +from .util import util_make_uri + +if os.getenv("TILEDB_REST_UNITTEST_TOKEN") is None: + pytest.skip( + reason="$TILEDB_REST_UNITTEST_TOKEN is not set", allow_module_level=True + ) + + +def test_basic_append(conftest_context, conftest_namespace, conftest_default_s3_path): + (creation_uri, readback_uri) = util_make_uri( + "soma-prod-ephemeral-data", + "ephemeral_basic_append", + conftest_namespace, + conftest_default_s3_path, + ) + + measurement_name = "RNA" + + adata1 = sc.datasets.pbmc3k() + adata1 .obs["when"] = ["Monday"] * len(adata1 .obs) + tiledbsoma.io.from_anndata(creation_uri, adata1 , measurement_name=measurement_name) + + with tiledbsoma.Experiment.open(readback_uri) as exp: + assert exp.obs.count == 2700 + assert exp.ms["RNA"].var.count == 32738 + assert exp.ms["RNA"].X["data"].shape == (2700, 32738) + + adata2 = sc.datasets.pbmc3k() + adata2.obs.index = [e.replace("-1", "-2") for e in adata1 .obs.index] + adata2.obs["when"] = ["Tuesday"] * len(adata2.obs) + adata2.X *= 10 + + rd = tiledbsoma.io.register_anndatas( + readback_uri, + [adata2], + measurement_name=measurement_name, + obs_field_name="obs_id", + var_field_name="var_id", + ) + + tiledbsoma.io.resize_experiment( + creation_uri, + nobs=rd.get_obs_shape(), + nvars=rd.get_var_shapes(), + ) + + tiledbsoma.io.from_anndata( + creation_uri, + adata2, + measurement_name=measurement_name, + registration_mapping=rd, + ) + + with tiledbsoma.Experiment.open(readback_uri) as exp: + assert exp.obs.count == 5400 + assert exp.ms["RNA"].var.count == 32738 + assert exp.ms["RNA"].X["data"].shape == (5400, 32738) diff --git a/apis/python/remote_tests/test_04_todo.py b/apis/python/remote_tests/test_99_todo.py similarity index 100% rename from apis/python/remote_tests/test_04_todo.py rename to apis/python/remote_tests/test_99_todo.py diff --git a/apis/python/remote_tests/util.py b/apis/python/remote_tests/util.py index 975fc32534..6ff5dd1f2c 100644 --- a/apis/python/remote_tests/util.py +++ b/apis/python/remote_tests/util.py @@ -37,6 +37,7 @@ def util_make_uri( if os.path.exists(uri): shutil.rmtree(uri) pathlib.Path(os.path.dirname(uri)).mkdir(parents=True, exist_ok=True) + # Please leave this comment in place. print() print("USING LOCAL URI", uri) print() From 565a51fa765961c5cb788849a8669161f578fccc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 12:32:09 -0500 Subject: [PATCH 4/8] more [skip ci] --- apis/python/remote_tests/test_03_versions.py | 23 ++++++++++++++++++++ apis/python/remote_tests/test_04_append.py | 8 +++---- apis/python/remote_tests/test_99_todo.py | 2 -- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/apis/python/remote_tests/test_03_versions.py b/apis/python/remote_tests/test_03_versions.py index ae95e1766d..4e06347c66 100644 --- a/apis/python/remote_tests/test_03_versions.py +++ b/apis/python/remote_tests/test_03_versions.py @@ -110,3 +110,26 @@ def test_experiment_queries(conftest_context, uri_and_info): ) assert (query.n_obs, query.n_vars) == (530, 4) + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_resize_information(conftest_context, uri_and_info): + uri, info = uri_and_info + print() + print("URI") + print(uri) + + upgradeable = tiledbsoma.io.upgrade_experiment_shapes( + uri, check_only=True, context=conftest_context + ) + if info["shape"] == "old": + assert upgradeable + else: + assert not upgradeable + + # tiledbsoma.io.show_experiment_shapes + # tiledbsoma.io.upgrade_experiment_shapes + # tiledbsoma.io.resize_experiment diff --git a/apis/python/remote_tests/test_04_append.py b/apis/python/remote_tests/test_04_append.py index f6eec2f81d..6c6432aaa9 100644 --- a/apis/python/remote_tests/test_04_append.py +++ b/apis/python/remote_tests/test_04_append.py @@ -3,10 +3,10 @@ import os import pytest +import scanpy as sc import tiledbsoma import tiledbsoma.io -import scanpy as sc from .util import util_make_uri @@ -27,8 +27,8 @@ def test_basic_append(conftest_context, conftest_namespace, conftest_default_s3_ measurement_name = "RNA" adata1 = sc.datasets.pbmc3k() - adata1 .obs["when"] = ["Monday"] * len(adata1 .obs) - tiledbsoma.io.from_anndata(creation_uri, adata1 , measurement_name=measurement_name) + adata1.obs["when"] = ["Monday"] * len(adata1.obs) + tiledbsoma.io.from_anndata(creation_uri, adata1, measurement_name=measurement_name) with tiledbsoma.Experiment.open(readback_uri) as exp: assert exp.obs.count == 2700 @@ -36,7 +36,7 @@ def test_basic_append(conftest_context, conftest_namespace, conftest_default_s3_ assert exp.ms["RNA"].X["data"].shape == (2700, 32738) adata2 = sc.datasets.pbmc3k() - adata2.obs.index = [e.replace("-1", "-2") for e in adata1 .obs.index] + adata2.obs.index = [e.replace("-1", "-2") for e in adata1.obs.index] adata2.obs["when"] = ["Tuesday"] * len(adata2.obs) adata2.X *= 10 diff --git a/apis/python/remote_tests/test_99_todo.py b/apis/python/remote_tests/test_99_todo.py index f0c1df4e7f..17682f3803 100644 --- a/apis/python/remote_tests/test_99_todo.py +++ b/apis/python/remote_tests/test_99_todo.py @@ -56,6 +56,4 @@ # dag.wait() # dag.node("output").result() -# * Make small stack; provenance -# * Append mode: Monday/Tuesday is fine # * Show, upgrade, resize From ab0f9bb589536cd5321f674b6ace3c10a4f32657 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 18:13:06 +0000 Subject: [PATCH 5/8] append-mode tests --- apis/python/remote_tests/test_03_versions.py | 73 +++++++++++++++++--- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/apis/python/remote_tests/test_03_versions.py b/apis/python/remote_tests/test_03_versions.py index 4e06347c66..17064c3442 100644 --- a/apis/python/remote_tests/test_03_versions.py +++ b/apis/python/remote_tests/test_03_versions.py @@ -1,5 +1,6 @@ from __future__ import annotations +import io import os import pytest @@ -116,20 +117,76 @@ def test_experiment_queries(conftest_context, uri_and_info): "uri_and_info", util_pbmc3k_unprocessed_versions(), ) -def test_resize_information(conftest_context, uri_and_info): +def test_upgrade_experiment_shapes(conftest_context, uri_and_info): uri, info = uri_and_info - print() - print("URI") - print(uri) + handle = io.StringIO() upgradeable = tiledbsoma.io.upgrade_experiment_shapes( - uri, check_only=True, context=conftest_context + uri, check_only=True, context=conftest_context, output_handle=handle ) + handle.seek(0) + lines = handle.readlines() + handle.close() + body = "\n".join(lines) + + assert "Dry run" in body if info["shape"] == "old": assert upgradeable else: assert not upgradeable + assert "dataframe already has its domain set" in body + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_resize_experiment_too_small(conftest_context, uri_and_info): + uri, info = uri_and_info + + handle = io.StringIO() + ok = tiledbsoma.io.resize_experiment( + uri, + nobs=10, + nvars={"RNA": 20}, + check_only=True, + context=conftest_context, + output_handle=handle, + ) + + handle.seek(0) + lines = handle.readlines() + handle.close() + body = "\n".join(lines) + + assert "Dry run" in body + assert not ok - # tiledbsoma.io.show_experiment_shapes - # tiledbsoma.io.upgrade_experiment_shapes - # tiledbsoma.io.resize_experiment + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_resize_experiment_ok(conftest_context, uri_and_info): + uri, info = uri_and_info + + handle = io.StringIO() + ok = tiledbsoma.io.resize_experiment( + uri, + nobs=100_000, + nvars={"RNA": 200_000}, + check_only=True, + context=conftest_context, + output_handle=handle, + ) + + handle.seek(0) + lines = handle.readlines() + handle.close() + body = "\n".join(lines) + + if info["shape"] == "old": + assert not ok + assert "dataframe currently has no domain set" in body + else: + assert ok From 4030d74d1f02edd4434ae91ac76fcf7789b49bfb Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 18:37:17 +0000 Subject: [PATCH 6/8] daily-remote-tests-issue-template.md --- .../daily-remote-tests-issue-template.md | 8 +++ .../daily-test-build-issue-template.md | 2 +- .github/workflows/libtiledb-ci.yml | 21 +++---- .github/workflows/libtiledbsoma-asan-ci.yml | 22 ++++--- .github/workflows/python-ci-minimal.yml | 25 ++++---- .../workflows/python-dependency-variation.yml | 19 +++--- .github/workflows/python-remote-storage.yml | 34 ++++++++--- .../workflows/r-python-interop-testing.yml | 26 ++++---- apis/python/remote_tests/README.md | 7 ++- apis/python/remote_tests/test_04_append.py | 14 ++++- apis/python/remote_tests/test_99_todo.py | 59 ------------------- 11 files changed, 103 insertions(+), 134 deletions(-) create mode 100644 .github/workflows/daily-remote-tests-issue-template.md delete mode 100644 apis/python/remote_tests/test_99_todo.py diff --git a/.github/workflows/daily-remote-tests-issue-template.md b/.github/workflows/daily-remote-tests-issue-template.md new file mode 100644 index 0000000000..ca0acfea9a --- /dev/null +++ b/.github/workflows/daily-remote-tests-issue-template.md @@ -0,0 +1,8 @@ +--- +title: Daily GitHub Actions fail for remote tests on {{ date | date('ddd, MMMM Do YYYY') }} +assignees: nguyenv, johnkerl +labels: bug +--- + +See run for more details: +https://github.com/{{ env.GITHUB_REPOSITORY }}/actions/runs/{{ env.GITHUB_RUN_ID }} diff --git a/.github/workflows/daily-test-build-issue-template.md b/.github/workflows/daily-test-build-issue-template.md index b6c19770c4..efb48190c5 100644 --- a/.github/workflows/daily-test-build-issue-template.md +++ b/.github/workflows/daily-test-build-issue-template.md @@ -1,5 +1,5 @@ --- -title: Daily GitHub Actions Build Fail on {{ date | date('ddd, MMMM Do YYYY') }} +title: Daily GitHub Actions build fail on {{ date | date('ddd, MMMM Do YYYY') }} assignees: nguyenv, ryan-williams, johnkerl labels: bug --- diff --git a/.github/workflows/libtiledb-ci.yml b/.github/workflows/libtiledb-ci.yml index 87f7b118fe..9bbfa562fc 100644 --- a/.github/workflows/libtiledb-ci.yml +++ b/.github/workflows/libtiledb-ci.yml @@ -1,18 +1,15 @@ name: libTileDB-SOMA CodeCov on: -# XXX TEMP -# pull_request: -# paths-ignore: -# - "apis/python/**" -# - "apis/r/**" -# - ".pre-commit-config.yaml" -# # XXX TEMP -# - ".github/workflows/python-remote-storage.yml" -# push: -# branches: -# - main -# - 'release-*' + pull_request: + paths-ignore: + - "apis/python/**" + - "apis/r/**" + - ".pre-commit-config.yaml" + push: + branches: + - main + - 'release-*' workflow_dispatch: jobs: diff --git a/.github/workflows/libtiledbsoma-asan-ci.yml b/.github/workflows/libtiledbsoma-asan-ci.yml index 45e1271024..631e080601 100644 --- a/.github/workflows/libtiledbsoma-asan-ci.yml +++ b/.github/workflows/libtiledbsoma-asan-ci.yml @@ -1,18 +1,16 @@ name: libtiledbsoma ASAN on: -# XXX TEMP -# pull_request: -# paths-ignore: -# - "apis/python/**" -# - "apis/r/**" -# - ".pre-commit-config.yaml" -# # XXX TEMP -# - ".github/workflows/python-remote-storage.yml" -# push: -# branches: -# - main -# - 'release-*' + pull_request: + paths-ignore: + - "apis/python/**" + - "apis/r/**" + - ".pre-commit-config.yaml" + - ".github/workflows/python-remote-storage.yml" + push: + branches: + - main + - 'release-*' workflow_dispatch: jobs: diff --git a/.github/workflows/python-ci-minimal.yml b/.github/workflows/python-ci-minimal.yml index 81d214e777..7168102607 100644 --- a/.github/workflows/python-ci-minimal.yml +++ b/.github/workflows/python-ci-minimal.yml @@ -8,19 +8,18 @@ name: TileDB-SOMA Python CI (Minimal) # To test the full matrix on a working branch, invoke ./python-ci-full.yml from # https://github.com/single-cell-data/TileDB-SOMA/actions/workflows/python-ci-full.yml on: - # XXX TEMP -# pull_request: -# branches: -# - main -# - 'release-*' -# paths: -# - '**' -# - '!**.md' -# - '!apis/r/**' -# - '!docs/**' -# - '!.github/**' -# - '.github/workflows/python-ci-minimal.yml' -# - '.github/workflows/python-ci-single.yml' + pull_request: + branches: + - main + - 'release-*' + paths: + - '**' + - '!**.md' + - '!apis/r/**' + - '!docs/**' + - '!.github/**' + - '.github/workflows/python-ci-minimal.yml' + - '.github/workflows/python-ci-single.yml' workflow_dispatch: jobs: diff --git a/.github/workflows/python-dependency-variation.yml b/.github/workflows/python-dependency-variation.yml index 3f2c654d1f..db6417013d 100644 --- a/.github/workflows/python-dependency-variation.yml +++ b/.github/workflows/python-dependency-variation.yml @@ -1,16 +1,15 @@ name: TileDB-SOMA Python CI with varying dependencies on: -# XXX TEMP -# push: -# branches: -# - main -# - 'release-*' -# pull_request: -# paths-ignore: -# - '**.md' -# - 'apis/r/**' -# - 'docs/**' + push: + branches: + - main + - 'release-*' + pull_request: + paths-ignore: + - '**.md' + - 'apis/r/**' + - 'docs/**' workflow_dispatch: jobs: diff --git a/.github/workflows/python-remote-storage.yml b/.github/workflows/python-remote-storage.yml index 8c5b8e8664..eec8ccd5f6 100644 --- a/.github/workflows/python-remote-storage.yml +++ b/.github/workflows/python-remote-storage.yml @@ -1,13 +1,18 @@ name: TileDB-SOMA Python CI (remote storage) on: - workflow_dispatch: + # Not for regular use, but you can uncomment this when putting up PRs on this + # file. Just remember to take it back out. There's no need to run + # remote-storage tests on every single PR push to our repo. + # + # pull_request: # - # Not for regular use: - # TEMP - pull_request: + # Allows for 'as needed' manual trigger: + workflow_dispatch: # - # TODO: a nightly cron + # Use a regular nighly build as well (time is UTC): + schedule: + - cron: "25 5 * * *" env: # Don't name this "TILEDB_REST_TOKEN" since that will map into a core @@ -28,8 +33,8 @@ jobs: # in GitHub Actions as of 2025-02-06. - name: linux os: ubuntu-24.04 - # TODO: also on 3.12. But 3.9 is higher-pri, until we drop support - # for it. (Note our main CI tests across a broader set of Python + # TO DO: also on 3.12. But 3.9 is higher-pri, until we drop support + # for it. (Note our main CI run tests across a broader set of Python # versions.) python_version: 3.9 cc: gcc-13 @@ -102,3 +107,18 @@ jobs: - name: Run pytests for Python shell: bash run: python -m pytest apis/python/remote_tests -v --durations=20 --maxfail=50 + + # File a bug report if anything fails, but don't file tickets for manual runs + # -- only for scheduled ones. + create_issue_on_fail: + needs: [ci] + if: (failure() || cancelled()) && github.event_name != 'workflow_dispatch' + steps: + - name: Checkout TileDB-SOMA `main` + uses: actions/checkout@v2 + - name: Create Issue if Build Fails + uses: JasonEtco/create-an-issue@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + filename: .github/workflows/daily-remote-tests-issue-template.md diff --git a/.github/workflows/r-python-interop-testing.yml b/.github/workflows/r-python-interop-testing.yml index b4c2f2110b..2687f8af7b 100644 --- a/.github/workflows/r-python-interop-testing.yml +++ b/.github/workflows/r-python-interop-testing.yml @@ -1,21 +1,17 @@ name: TileDB-SOMA R-Python interop testing on: -# XXX TEMP -# pull_request: -# paths-ignore: -# # XXX TEMP -# - ".github/workflows/python-remote-storage.yml" -# # TODO: leave this enabled for pre-merge signal for now. At some point we may want to go back to -# # only having this signal post-merge. -# #paths: -# # - "apis/python/**" -# # - "apis/r/**" -# # - "apis/system/**" -# push: -# branches: -# - main -# - "release-*" + pull_request: + # TODO: leave this enabled for pre-merge signal for now. At some point we may want to go back to + # only having this signal post-merge. + #paths: + # - "apis/python/**" + # - "apis/r/**" + # - "apis/system/**" + push: + branches: + - main + - "release-*" workflow_dispatch: jobs: diff --git a/apis/python/remote_tests/README.md b/apis/python/remote_tests/README.md index 1870871e26..9f76f8a4f6 100644 --- a/apis/python/remote_tests/README.md +++ b/apis/python/remote_tests/README.md @@ -1,8 +1,11 @@ # How to run these tests +These are nominally a nightly cron. However, you can run them manually if you want. + ``` -export TILEDB_REST_TOKEN="..." # Get the token for the Saas `unittest` user -unsetTILEDB_REST_PAYER_NAMESPACE # If you have that set +export TILEDB_REST_UNITTEST_TOKEN="..." # Get the token for the Saas `unittest` user +unset TILEDB_REST_TOKEN # If you have that set +unset TILEDB_REST_PAYER_NAMESPACE # If you have that set ``` As of 2025-02-07, use Python 3.9 to run UDF tests; otherwise they will be skipped. diff --git a/apis/python/remote_tests/test_04_append.py b/apis/python/remote_tests/test_04_append.py index 6c6432aaa9..bd49ddab45 100644 --- a/apis/python/remote_tests/test_04_append.py +++ b/apis/python/remote_tests/test_04_append.py @@ -28,9 +28,14 @@ def test_basic_append(conftest_context, conftest_namespace, conftest_default_s3_ adata1 = sc.datasets.pbmc3k() adata1.obs["when"] = ["Monday"] * len(adata1.obs) - tiledbsoma.io.from_anndata(creation_uri, adata1, measurement_name=measurement_name) + tiledbsoma.io.from_anndata( + creation_uri, + adata1, + measurement_name=measurement_name, + context=conftest_context, + ) - with tiledbsoma.Experiment.open(readback_uri) as exp: + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: assert exp.obs.count == 2700 assert exp.ms["RNA"].var.count == 32738 assert exp.ms["RNA"].X["data"].shape == (2700, 32738) @@ -46,12 +51,14 @@ def test_basic_append(conftest_context, conftest_namespace, conftest_default_s3_ measurement_name=measurement_name, obs_field_name="obs_id", var_field_name="var_id", + context=conftest_context, ) tiledbsoma.io.resize_experiment( creation_uri, nobs=rd.get_obs_shape(), nvars=rd.get_var_shapes(), + context=conftest_context, ) tiledbsoma.io.from_anndata( @@ -59,9 +66,10 @@ def test_basic_append(conftest_context, conftest_namespace, conftest_default_s3_ adata2, measurement_name=measurement_name, registration_mapping=rd, + context=conftest_context, ) - with tiledbsoma.Experiment.open(readback_uri) as exp: + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: assert exp.obs.count == 5400 assert exp.ms["RNA"].var.count == 32738 assert exp.ms["RNA"].X["data"].shape == (5400, 32738) diff --git a/apis/python/remote_tests/test_99_todo.py b/apis/python/remote_tests/test_99_todo.py deleted file mode 100644 index 17682f3803..0000000000 --- a/apis/python/remote_tests/test_99_todo.py +++ /dev/null @@ -1,59 +0,0 @@ -## ================================================================ -### UDFs -# def remote_obs_schema(exp_uri): -# import tiledbsoma -# exp = tiledbsoma.Experiment.open(exp_uri) -# return exp.obs.schema -# import tiledb.cloud -# import tiledb.cloud.udf -# tiledb.cloud.udf.exec( -# remote_obs_schema, -# soma_pbmc3k_uri, -# ) -# -# def remote_query(exp_uri): -# import tiledbsoma -# exp = tiledbsoma.Experiment.open(exp_uri) -# -# query = tiledbsoma.ExperimentAxisQuery( -# experiment=exp, -# measurement_name="RNA", -# obs_query=tiledbsoma.AxisQuery( -# value_filter="n_genes_by_counts > 1000", -# ), -# var_query=tiledbsoma.AxisQuery( -# value_filter="n_cells_by_counts > 100", -# ), -# ) -# -# return (query.n_obs, query.n_vars) -# tiledb.cloud.udf.exec( -# remote_query, soma_pbmc3k_uri, -# ) -# -## ================================================================ -## Collection-mapper test -# from tiledb.cloud.taskgraphs import client_executor as executor -# soco_uri = 'tiledb://TileDB-Inc/stack-small-soco-staging' -# res = tiledb.cloud.udf.exec( -# 'TileDB-Inc/soma_experiment_collection_mapper', -# soco_uri=soco_uri, -# measurement_name="RNA", -# X_layer_name="data", -# # callback = lambda x: x.obs.shape, -# # callback = lambda x: x, -# callback = lambda adata: [adata.obs.shape, adata.var.shape, adata.X.shape], -# # callback = lambda adata: adata.var, -# args_dict={}, -# reducer = lambda x: x, -# obs_attrs = ['obs_id', 'cell_type', 'is_primary_data'], -# var_attrs = ['var_id', 'means'], -# ) -# dag = executor.LocalExecutor(res, namespace = "TileDB-Inc") -# dag.visualize() -##%%time -# dag.execute() -# dag.wait() -# dag.node("output").result() - -# * Show, upgrade, resize From 873c5451fd0ba3e3c165747ca465a5b8e68c1ccc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 18:50:11 +0000 Subject: [PATCH 7/8] don't run irrelevant CI jobs when our YAML is modified --- .github/workflows/libtiledb-ci.yml | 1 + .github/workflows/python-ci-minimal.yml | 1 + .github/workflows/python-ci-packaging.yml | 2 ++ .github/workflows/python-dependency-variation.yml | 1 + 4 files changed, 5 insertions(+) diff --git a/.github/workflows/libtiledb-ci.yml b/.github/workflows/libtiledb-ci.yml index 9bbfa562fc..ae6b1731ae 100644 --- a/.github/workflows/libtiledb-ci.yml +++ b/.github/workflows/libtiledb-ci.yml @@ -6,6 +6,7 @@ on: - "apis/python/**" - "apis/r/**" - ".pre-commit-config.yaml" + - ".github/workflows/python-remote-storage.yml" push: branches: - main diff --git a/.github/workflows/python-ci-minimal.yml b/.github/workflows/python-ci-minimal.yml index 7168102607..dfc5958edc 100644 --- a/.github/workflows/python-ci-minimal.yml +++ b/.github/workflows/python-ci-minimal.yml @@ -20,6 +20,7 @@ on: - '!.github/**' - '.github/workflows/python-ci-minimal.yml' - '.github/workflows/python-ci-single.yml' + - ".github/workflows/python-remote-storage.yml" workflow_dispatch: jobs: diff --git a/.github/workflows/python-ci-packaging.yml b/.github/workflows/python-ci-packaging.yml index ebf3c9d9c3..310455045f 100644 --- a/.github/workflows/python-ci-packaging.yml +++ b/.github/workflows/python-ci-packaging.yml @@ -7,6 +7,7 @@ on: push: paths: - '.github/workflows/python-ci-packaging.yml' + - ".github/workflows/python-remote-storage.yml" - 'apis/python/MANIFEST.in' - 'apis/python/pyproject.toml' - 'apis/python/setup.py' @@ -21,6 +22,7 @@ on: pull_request: paths: - '.github/workflows/python-ci-packaging.yml' + - ".github/workflows/python-remote-storage.yml" - 'apis/python/MANIFEST.in' - 'apis/python/pyproject.toml' - 'apis/python/setup.py' diff --git a/.github/workflows/python-dependency-variation.yml b/.github/workflows/python-dependency-variation.yml index db6417013d..59d9cf7b4f 100644 --- a/.github/workflows/python-dependency-variation.yml +++ b/.github/workflows/python-dependency-variation.yml @@ -10,6 +10,7 @@ on: - '**.md' - 'apis/r/**' - 'docs/**' + - ".github/workflows/python-remote-storage.yml" workflow_dispatch: jobs: From 9b66f5b1f81377b20628541bde6a9c34e8ab2917 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 7 Feb 2025 15:55:10 -0500 Subject: [PATCH 8/8] typofix Co-authored-by: Julia Dark <24235303+jp-dark@users.noreply.github.com> --- apis/python/remote_tests/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apis/python/remote_tests/README.md b/apis/python/remote_tests/README.md index 9f76f8a4f6..ddb25b978d 100644 --- a/apis/python/remote_tests/README.md +++ b/apis/python/remote_tests/README.md @@ -38,7 +38,7 @@ aws s3 cp s3://tiledb-unittest/soma-prod-test-data/h5ad . Then use `tiledbsoma.io.from_h5ad` with the following sources and data: -* Preferr a bare Docker image +* Prefer a bare Docker image * Repeat for all desired TileDB-SOMA versions: * `pip install tiledbsoma==1.15.7` (or whichever version) * Ingest to `s3://tiledb-unittest/soma-prod-test-data/1.15.7/pbmc3k_unprocessed_1.15.7`