diff --git a/src/metaschema.json b/src/metaschema.json index a3e500dad7..5d01c8862f 100644 --- a/src/metaschema.json +++ b/src/metaschema.json @@ -751,6 +751,10 @@ "type": "object", "properties": { "level": { "enum": ["optional", "recommended", "required"] }, + "datatypes": { + "type": "array", + "items": { "pattern": "^[a-z]+$" } + }, "stem": { "type": "string" }, "extensions": { "type": "array", "items": { "type": "string" } } }, diff --git a/src/schema/rules/files/common/tables.yaml b/src/schema/rules/files/common/tables.yaml index 5a304dbfd7..21f03f6579 100644 --- a/src/schema/rules/files/common/tables.yaml +++ b/src/schema/rules/files/common/tables.yaml @@ -30,3 +30,16 @@ sessions: # This file may only exist if session is present in the dataset. - .json entities: subject: required + +# Phenotype is a special case where there are no applicable entities, but a +# parent directory is specified. This most closely matches datatype in the current +# structure. We also require a stem that can match any value, as there are no +# constraints on the filename except extension. +phenotype: + level: optional + datatypes: + - phenotype + stem: '*' + extensions: + - .tsv + - .json diff --git a/tools/schemacode/bidsschematools/conftest.py b/tools/schemacode/bidsschematools/conftest.py index 3154e3e5cf..2e5eaec86f 100644 --- a/tools/schemacode/bidsschematools/conftest.py +++ b/tools/schemacode/bidsschematools/conftest.py @@ -25,6 +25,7 @@ "qmri_tb1tfl", # fmap, _TB1TFL "qmri_vfa", # derivatives "ds000248", # .bidsignore + "fnirs_automaticity", # phenotypic ] # Errors are described in the README of the respective datasets: # https://github.com/bids-standard/bids-error-examples diff --git a/tools/schemacode/bidsschematools/data/tests/test_rules.py b/tools/schemacode/bidsschematools/data/tests/test_rules.py index 6c46a5da5d..39fce83a3a 100644 --- a/tools/schemacode/bidsschematools/data/tests/test_rules.py +++ b/tools/schemacode/bidsschematools/data/tests/test_rules.py @@ -87,6 +87,11 @@ def test_rule_objects(schema_obj): # Build a list of items mentioned in rules, but not found in objects. if use not in object_values: + if (use, object_type) == ("phenotype", "datatypes"): + # Special case: phenotype is a top-level directory + # that acts like a datatype, but we don't want to + # define it that way in the glossary, currently. + continue temp_path = path[:] if is_list: temp_path[-1] += f"[{i_use}]" diff --git a/tools/schemacode/bidsschematools/rules.py b/tools/schemacode/bidsschematools/rules.py index 22e1fce649..6004b0363b 100644 --- a/tools/schemacode/bidsschematools/rules.py +++ b/tools/schemacode/bidsschematools/rules.py @@ -4,6 +4,7 @@ ``schema.rules.files``. """ +import fnmatch import re import typing as ty from collections.abc import Mapping @@ -125,7 +126,7 @@ def _entity_rule(rule: Mapping, schema: bst.types.Namespace): ext_regex = f"(?P{ext_match})" return { - "regex": "".join(dir_regex + entity_regex + [suffix_regex, ext_regex]), + "regex": "".join(dir_regex + entity_regex + [suffix_regex, ext_regex, r"\Z"]), "mandatory": False, } @@ -170,15 +171,24 @@ def _sanitize_extension(ext: str) -> str: def _stem_rule(rule: bst.types.Namespace): - stem_regex = re.escape(rule.stem) + # translate includes a trailing \Z (end of string) but we expect extensions + stem_match = fnmatch.translate(rule.stem)[:-2] + stem_regex = f"(?P{stem_match})" + + dtypes = set(rule.get("datatypes", ())) + dir_regex = f"(?P{'|'.join(dtypes)})/" if dtypes else "" + ext_match = "|".join(_sanitize_extension(ext) for ext in rule.extensions) - ext_regex = f"(?P{ext_match})" + ext_regex = rf"(?P{ext_match})\Z" - return {"regex": stem_regex + ext_regex, "mandatory": rule.level == "required"} + return {"regex": dir_regex + stem_regex + ext_regex, "mandatory": rule.level == "required"} def _path_rule(rule: bst.types.Namespace): - return {"regex": re.escape(rule.path), "mandatory": rule.level == "required"} + path_match = re.escape(rule.path) + # Exact path matches may be files or opaque directories + # Consider using rules.directories to identify opaque directories + return {"regex": rf"(?P{path_match})(?:/.*)?\Z", "mandatory": rule.level == "required"} def regexify_filename_rules( diff --git a/tools/schemacode/bidsschematools/tests/test_rules.py b/tools/schemacode/bidsschematools/tests/test_rules.py index 693c4b24fe..35b167e199 100644 --- a/tools/schemacode/bidsschematools/tests/test_rules.py +++ b/tools/schemacode/bidsschematools/tests/test_rules.py @@ -21,7 +21,7 @@ def test_entity_rule(schema_obj): r"sub-(?P=subject)_" r"(?:ses-(?P=session)_)?" r"(?PT1w)" - r"(?P\.nii)" + r"(?P\.nii)\Z" ), "mandatory": False, } @@ -43,7 +43,7 @@ def test_entity_rule(schema_obj): r"(?:sub-(?P=subject)_)?" r"(?:ses-(?P=session)_)?" r"(?PT1w)" - r"(?P\.json)" + r"(?P\.json)\Z" ), "mandatory": False, } @@ -84,7 +84,7 @@ def test_split_inheritance_rules(): def test_stem_rule(): rule = Namespace.build({"stem": "README", "level": "required", "extensions": ["", ".md"]}) assert rules._stem_rule(rule) == { - "regex": r"README(?P|\.md)", + "regex": r"(?P(?s:README))(?P|\.md)\Z", "mandatory": True, } @@ -92,7 +92,21 @@ def test_stem_rule(): {"stem": "participants", "level": "optional", "extensions": [".tsv", ".json"]} ) assert rules._stem_rule(rule) == { - "regex": r"participants(?P\.tsv|\.json)", + "regex": r"(?P(?s:participants))(?P\.tsv|\.json)\Z", + "mandatory": False, + } + + # Wildcard stem, with datatype + rule = Namespace.build( + { + "stem": "*", + "datatypes": ["phenotype"], + "level": "optional", + "extensions": [".tsv", ".json"], + } + ) + assert rules._stem_rule(rule) == { + "regex": r"(?Pphenotype)/(?P(?s:.*))(?P\.tsv|\.json)\Z", "mandatory": False, } @@ -100,12 +114,12 @@ def test_stem_rule(): def test_path_rule(): rule = Namespace.build({"path": "dataset_description.json", "level": "required"}) assert rules._path_rule(rule) == { - "regex": r"dataset_description\.json", + "regex": r"(?Pdataset_description\.json)(?:/.*)?\Z", "mandatory": True, } rule = Namespace.build({"path": "LICENSE", "level": "optional"}) - assert rules._path_rule(rule) == {"regex": "LICENSE", "mandatory": False} + assert rules._path_rule(rule) == {"regex": r"(?PLICENSE)(?:/.*)?\Z", "mandatory": False} def test_regexify_all():