Skip to content

Commit

Permalink
Merge branch 'kerl/python-remote-storage-ci' of github.com:single-cel…
Browse files Browse the repository at this point in the history
…l-data/TileDB-SOMA into kerl/python-remote-storage-ci
  • Loading branch information
johnkerl committed Feb 6, 2025
2 parents ab51c72 + 4101e7b commit 68d7314
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 37 deletions.
41 changes: 41 additions & 0 deletions apis/python/remote_tests/_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import datetime
import os
import pathlib
from typing import Tuple

import tiledb.cloud

# For cloud:
# * Create with timestamp
# * Delete on teardown
# For local:
# * Create without timestamp
# o Only remove the URI from a _previous_ run (if any)
# * Do not delete on teardown -- so developers can look at the data


def util_make_uri(dirname: str, basename: str) -> Tuple[str, str]:
if os.getenv("TILEDB_SOMA_CLOUD_TEST_LOCAL_PATHS") is None:
user_profile = tiledb.cloud.user_profile()
namespace = user_profile.username
# Contains the "s3://..." prefix and a trailing slash
# Note that double slashes can cause group-creation failures
# so we need to carefully strip them out.
bucket = (user_profile.default_s3_path).rstrip("/")

stamp = datetime.datetime.today().strftime("%Y%m%d-%H%M%S")
creation_uri = f"tiledb://{namespace}/{bucket}/{dirname}/{basename}_{stamp}"
readback_uri = f"tiledb://{namespace}/{basename}_{stamp}"

return (creation_uri, readback_uri)

else:
uri = f"/tmp/tiledbsoma-cloud-test/{dirname}/{basename}"
pathlib.Path(os.path.dirname(uri)).mkdir(parents=True, exist_ok=True)
return (uri, uri)


def util_tear_down_uri(uri):
if uri.startswith("tiledb://"):
tiledb.cloud.groups.delete(uri=uri, recursive=True)
# Delete local URIs only on _next_ run, so devs can inspect
19 changes: 7 additions & 12 deletions apis/python/remote_tests/test_01_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# more complex things.
from __future__ import annotations

import datetime
import os
import sys

Expand All @@ -13,6 +12,8 @@
import tiledbsoma.io
import tiledb.cloud

from ._util import util_make_uri, util_tear_down_uri

if os.getenv("TILEDB_REST_TOKEN") is None:
pytest.skip(reason="$TILEDB_REST_TOKEN is not set", allow_module_level=True)

Expand All @@ -31,16 +32,10 @@ def test_basic_read():


def test_basic_write():
# xxx make helper function
user_profile = tiledb.cloud.user_profile()
namespace = user_profile.username
bucket = user_profile.default_s3_path

dirname = "soma-prod-ephemeral-data"
basename = "pbmc3k_unprocessed_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

creation_uri = f"tiledb://{namespace}/s3://{bucket}/{dirname}/{basename}"
readback_uri = f"tiledb://{namespace}/{basename}"
(creation_uri, readback_uri) = util_make_uri(
"soma-prod-ephemeral-data", "pbmc3k_unprocessed"
)
print("CREATION URI", creation_uri)

adata = scanpy.datasets.pbmc3k()

Expand All @@ -55,7 +50,7 @@ def test_basic_write():
assert "RNA" in exp.ms
assert exp.ms["RNA"].var.count == 32738

tiledb.cloud.groups.delete(uri=readback_uri, recursive=True)
util_tear_down_uri(readback_uri)


@pytest.mark.skipif(
Expand Down
44 changes: 19 additions & 25 deletions apis/python/remote_tests/test_02_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
# more complex things.
from __future__ import annotations

import datetime
import os
import sys

import pandas as pd
import pytest
import scanpy as sc
import pandas as pd

import tiledbsoma
import tiledbsoma.io
import tiledbsoma.logging
import tiledb.cloud

from ._util import util_make_uri, util_tear_down_uri

if os.getenv("TILEDB_REST_TOKEN") is None:
pytest.skip(reason="$TILEDB_REST_TOKEN is not set", allow_module_level=True)

Expand All @@ -27,21 +27,13 @@
)



def test_write_with_updates():
user_profile = tiledb.cloud.user_profile()
namespace = user_profile.username
bucket = user_profile.default_s3_path.rstrip("/")
dirname = "soma-prod-ephemeral-data"
basename = "pbmc3k_analysis_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

creation_uri = f"tiledb://{namespace}/{bucket}/{dirname}/{basename}"
readback_uri = f"tiledb://{namespace}/{basename}"
#creation_uri = "/tmp/foo"
#readback_uri = "/tmp/foo"
#import os, shutil
#if os.path.exists(creation_uri):
# shutil.rmtree(creation_uri)
(creation_uri, readback_uri) = util_make_uri(
"soma-prod-ephemeral-data", "pbmc3k_analysis"
)
print()
print("CREATION URI", creation_uri)
print()

adata = sc.datasets.pbmc3k()

Expand All @@ -55,12 +47,15 @@ def test_write_with_updates():
with tiledbsoma.Experiment.open(readback_uri) as exp:
assert "RNA" in exp.ms

assert exp.metadata.get('dataset_type') == 'soma'
assert exp.metadata.get('soma_object_type') == 'SOMAExperiment'
assert exp.obs.metadata.get('soma_object_type') == 'SOMADataFrame'
assert exp.ms["RNA"].var.metadata.get('soma_object_type') == 'SOMADataFrame'
assert exp.metadata.get("dataset_type") == "soma"
assert exp.metadata.get("soma_object_type") == "SOMAExperiment"
assert exp.obs.metadata.get("soma_object_type") == "SOMADataFrame"
assert exp.ms["RNA"].var.metadata.get("soma_object_type") == "SOMADataFrame"
assert "data" in exp.ms["RNA"].X
assert exp.ms["RNA"].X["data"].metadata.get('soma_object_type') == 'SOMASparseNDArray'
assert (
exp.ms["RNA"].X["data"].metadata.get("soma_object_type")
== "SOMASparseNDArray"
)

assert exp.obs.count == adata.obs.shape[0]
assert exp.ms["RNA"].var.count == adata.var.shape[0]
Expand All @@ -72,7 +67,7 @@ def test_write_with_updates():
# Here we augment that with some on-the-fly computed data. This imitates a common customer workflow.
# Add a categorical column
parity = [["even", "odd"][e % 2] for e in range(len(adata.obs))]
adata.obs['parity'] = pd.Categorical(parity)
adata.obs["parity"] = pd.Categorical(parity)
with tiledbsoma.Experiment.open(creation_uri, "w") as exp:
tiledbsoma.io.update_obs(exp, adata.obs)

Expand All @@ -98,7 +93,6 @@ def test_write_with_updates():
sc.pp.scale(adata)
sc.tl.pca(adata, use_highly_variable=True, n_comps=5)

matrix = adata.obsm["X_pca"]
with tiledbsoma.open(creation_uri, "w") as exp:
tiledbsoma.io.add_matrix_to_collection(
exp=exp,
Expand All @@ -122,4 +116,4 @@ def test_write_with_updates():
with tiledbsoma.open(exp.uri) as exp:
assert sorted(list(exp.ms["RNA"].varm.keys())) == ["logcounts_pcs"]

tiledb.cloud.groups.delete(uri=readback_uri, recursive=True)
util_tear_down_uri(readback_uri)

0 comments on commit 68d7314

Please sign in to comment.