-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add component to subset obsp * update changelog * update descriptions * fix tests * add comment
- Loading branch information
Showing
4 changed files
with
179 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
name: subset_obsp | ||
namespace: "filter" | ||
description: | | ||
Create a subset of an .obsp field in a mudata file, by filtering the columns based on the values of an .obs column. The resulting subset is moved to an .obsm slot. | ||
authors: | ||
- __merge__: /src/authors/dorien_roosen.yaml | ||
roles: [ author, maintainer ] | ||
argument_groups: | ||
- name: Input | ||
arguments: | ||
- name: "--input" | ||
type: file | ||
description: Input h5mu file | ||
direction: input | ||
required: true | ||
example: input.h5mu | ||
- name: "--modality" | ||
type: string | ||
default: "rna" | ||
required: false | ||
- name: "--input_obsp_key" | ||
type: string | ||
required: true | ||
description: The .obsp field to be filtered. | ||
- name: "--input_obs_key" | ||
type: string | ||
required: true | ||
description: The .obs column to filter on. | ||
- name: "--input_obs_value" | ||
type: string | ||
required: true | ||
description: The value to filter on in the .obs column. | ||
- name: Output | ||
arguments: | ||
- name: "--output" | ||
type: file | ||
description: Output h5mu file. | ||
direction: output | ||
example: output.h5mu | ||
- name: "--output_obsm_key" | ||
type: string | ||
required: true | ||
description: The .obsm key to store the subset in. | ||
- name: "--output_compression" | ||
type: string | ||
description: The compression format to be used on the output h5mu object. | ||
choices: ["gzip", "lzf"] | ||
required: false | ||
example: "gzip" | ||
|
||
resources: | ||
- type: python_script | ||
path: script.py | ||
- path: /src/utils/setup_logger.py | ||
test_resources: | ||
- type: python_script | ||
path: test.py | ||
- path: /resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu | ||
|
||
engines: | ||
- type: docker | ||
image: python:3.12-slim | ||
setup: | ||
- type: apt | ||
packages: | ||
- procps | ||
- type: python | ||
__merge__: /src/base/requirements/anndata_mudata.yaml | ||
__merge__: [ /src/base/requirements/python_test_setup.yaml, .] | ||
|
||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [singlecpu, lowmem] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import mudata as mu | ||
|
||
### VIASH START | ||
par = { | ||
'input': 'resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu', | ||
'modality': 'rna', | ||
'input_obsp_key': 'distances', | ||
'input_obs_key': 'leiden', | ||
'input_obs_value': '1', | ||
'output_obsm_key': "leiden_1", | ||
'output': 'subset_obsp_output.h5mu', | ||
'output_compression': None, | ||
} | ||
### VIASH END | ||
|
||
# START TEMPORARY WORKAROUND setup_logger | ||
# reason: resources aren't available when using Nextflow fusion | ||
# from setup_logger import setup_logger | ||
def setup_logger(): | ||
import logging | ||
from sys import stdout | ||
|
||
logger = logging.getLogger() | ||
logger.setLevel(logging.INFO) | ||
console_handler = logging.StreamHandler(stdout) | ||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") | ||
console_handler.setFormatter(logFormatter) | ||
logger.addHandler(console_handler) | ||
|
||
return logger | ||
# END TEMPORARY WORKAROUND setup_logger | ||
logger = setup_logger() | ||
|
||
def main(): | ||
logger.info(f"Reading {par['input']}") | ||
mdata = mu.read_h5mu(par["input"]) | ||
adata = mdata.mod[par["modality"]] | ||
|
||
logger.info(f"Subset columns of obsp matrix under {par['input_obsp_key']} based on {par['input_obs_key']} == {par['input_obs_value']}") | ||
# .obsp, .obs and .obsm index and .obsp columns all have a dimension length of `n_obs` | ||
# the index dimensions remain unaltered, but .obsp columns will be subset | ||
obsp = adata.obsp[par["input_obsp_key"]] | ||
idx = adata.obs[par["input_obs_key"]].astype(str) == par["input_obs_value"] | ||
obsm_subset = obsp[:, idx] | ||
|
||
logger.info(f"Writing subset obsp matrix to .obsm {par['output_obsm_key']}") | ||
adata.obsm[par["output_obsm_key"]] = obsm_subset | ||
|
||
logger.info(f"Writing output to {par['output']}") | ||
mdata.write_h5mu(par["output"], compression=par["output_compression"]) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import sys | ||
import pytest | ||
import mudata as mu | ||
|
||
## VIASH START | ||
meta = { | ||
'resources_dir': 'resources_test/pbmc_1k_protein_v3/' | ||
} | ||
## VIASH END | ||
|
||
|
||
@pytest.fixture | ||
def input_h5mu(): | ||
input = mu.read_h5mu(f"{meta['resources_dir']}/pbmc_1k_protein_v3_mms.h5mu") | ||
input.mod["rna"].obs["filter_column"] = "group_2" | ||
input.mod["rna"].obs["filter_column"][:50] = "group_1" | ||
return input | ||
|
||
|
||
@pytest.fixture | ||
def input_path(write_mudata_to_file, input_h5mu): | ||
return write_mudata_to_file(input_h5mu) | ||
|
||
|
||
def test_subset_obsp(input_path, run_component, tmp_path): | ||
output_path = tmp_path / "output.h5mu" | ||
|
||
# run component | ||
run_component([ | ||
"--input", input_path, | ||
"--output", str(output_path), | ||
"--input_obsp_key", "distances", | ||
"--input_obs_key", "filter_column", | ||
"--input_obs_value", "group_1", | ||
"--output_obsm_key", "group_1" | ||
]) | ||
|
||
assert output_path.is_file(), "Output file not found" | ||
|
||
# check output file | ||
mu_out = mu.read_h5mu(output_path) | ||
|
||
assert "group_1" in mu_out.mod["rna"].obsm, "Output should contain group_1 in .obsm" | ||
assert mu_out.mod["rna"].obsm["group_1"].shape[1] == 50, "Obsm should only contain a subset of the original obsp matrix" | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(pytest.main([__file__])) |