Skip to content

Commit

Permalink
fix(regex): Construct backreferences to ensure entity consistency (#1889
Browse files Browse the repository at this point in the history
)

* fix(regex): Construct backreferences to ensure entity consistency

* test(regex): Test the regexes more thoroughly
  • Loading branch information
effigies authored Aug 14, 2024
1 parent 0e506f0 commit 58e236e
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 6 deletions.
10 changes: 10 additions & 0 deletions tools/schemacode/bidsschematools/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ def _format_entity(entity, name, pattern, level, directory=False):
if directory and entity not in DIR_ENTITIES:
return ""

# For a directory entity appearing in a filename, match the value
# found in the directory name
# Note that this makes it impossible to do something like
# /ses-1_dwi.json instead of /sub-??/ses-1/sub-??_ses-1_dwi.json
# We'll assume this is a negligible use case.
# It is possible to create such a regular expression, but doing it
# in a piecewise fashion is difficult.
if not directory and entity in DIR_ENTITIES:
return rf"(?({entity}){name}-(?P={entity})_)"

label = _capture_regex(entity, pattern, not directory and entity in DIR_ENTITIES)
post = "/" if directory else "_"

Expand Down
37 changes: 31 additions & 6 deletions tools/schemacode/bidsschematools/tests/test_rules.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from bidsschematools import rules

from ..types import Namespace
Expand All @@ -13,19 +15,29 @@ def test_entity_rule(schema_obj):
"extensions": [".nii"],
}
)
assert rules._entity_rule(rule, schema_obj) == {
nii_rule = rules._entity_rule(rule, schema_obj)
assert nii_rule == {
"regex": (
r"sub-(?P<subject>[0-9a-zA-Z]+)/"
r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
r"(?P<datatype>anat)/"
r"sub-(?P=subject)_"
r"(?:ses-(?P=session)_)?"
r"(?(subject)sub-(?P=subject)_)"
r"(?(session)ses-(?P=session)_)"
r"(?P<suffix>T1w)"
r"(?P<extension>\.nii)\Z"
),
"mandatory": False,
}

assert re.match(nii_rule["regex"], "sub-01/anat/sub-01_T1w.nii")
assert re.match(nii_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/anat/sub-02_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/sub-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/ses-01/anat/sub-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/anat/sub-01_ses-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-02_T1w.nii")

# Sidecar entities are optional
rule = Namespace.build(
{
Expand All @@ -35,18 +47,31 @@ def test_entity_rule(schema_obj):
"extensions": [".json"],
}
)
assert rules._entity_rule(rule, schema_obj) == {
json_rule = rules._entity_rule(rule, schema_obj)
assert json_rule == {
"regex": (
r"(?:sub-(?P<subject>[0-9a-zA-Z]+)/)?"
r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
r"(?:(?P<datatype>anat)/)?"
r"(?:sub-(?P=subject)_)?"
r"(?:ses-(?P=session)_)?"
r"(?(subject)sub-(?P=subject)_)"
r"(?(session)ses-(?P=session)_)"
r"(?P<suffix>T1w)"
r"(?P<extension>\.json)\Z"
),
"mandatory": False,
}
assert re.match(json_rule["regex"], "sub-01/anat/sub-01_T1w.json")
assert re.match(json_rule["regex"], "sub-01/sub-01_T1w.json")
assert re.match(json_rule["regex"], "T1w.json")
assert re.match(json_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-01_T1w.json")
assert re.match(json_rule["regex"], "sub-01/ses-01/sub-01_ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/anat/sub-02_T1w.json")
assert not re.match(json_rule["regex"], "sub-01_T1w.json")
assert not re.match(json_rule["regex"], "ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/ses-01/anat/sub-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/anat/sub-01_ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/ses-01/ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-02_T1w.json")


def test_split_inheritance_rules():
Expand Down
36 changes: 36 additions & 0 deletions tools/schemacode/bidsschematools/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,42 @@ def test_inheritance_examples():
assert result["path_tracking"] == incorrect_inheritance


def test_regression_examples():
"""Tests that failed on pybids when switching to bst-regex-based validation"""
examples = [
"/sub-01/ses-ses/sub-01_dwi.bval", # redundant dir /ses-ses/
"/sub-01/01_dwi.bvec", # missed subject suffix
"/sub-01/sub_dwi.json", # missed subject id
"/sub-01/sub-01_23_run-01_dwi.bval", # wrong _23_
"/sub-01/sub-01_run-01_dwi.vec", # wrong extension
"/sub-01/sub-01_run-01_dwi.jsn", # wrong extension
"/sub-01/sub-01_acq_dwi.bval", # missed suffix value
"/sub-01/sub-01_acq-23-singleband_dwi.bvec", # redundant -23-
"/sub-01/anat/sub-01_acq-singleband_dwi.json", # redundant /anat/
"/sub-01/sub-01_recrod-record_acq-singleband_run-01_dwi.bval", # redundant record-record_
"/sub_01/sub-01_acq-singleband_run-01_dwi.bvec", # wrong /sub_01/
"/sub-01/sub-01_acq-singleband__run-01_dwi.json", # wrong __
"/sub-01/ses-test/sub-01_ses_test_dwi.bval", # wrong ses_test
"/sub-01/ses-test/sb-01_ses-test_dwi.bvec", # wrong sb-01
"/sub-01/ses-test/sub-01_ses-test_dw.json", # wrong modality
"/sub-01/ses-test/sub-01_ses-test_run-01_dwi.val", # wrong extension
"/sub-01/ses-test/sub-01_run-01_dwi.bvec", # missed session in the filename
# This validator adds a .*/ to the regex, so this will be a false negative
# If I cared about this validator, I would dig into it, but it doesn't seem worth it.
# -cjm 2024.08.14
# "/sub-01/ses-test/ses-test_run-01_dwi.json", # missed subject in the filename
"/sub-01/ses-test/sub-01_ses-test_acq-singleband.bval", # missed modality
"/sub-01/ses-test/sub-01_ses-test_acq-singleband_dwi", # missed extension
"/ses-test/sub-01/sub-01_ses-test_acq-singleband_dwi.json", # wrong dirs order
"/sub-01/ses-test/sub-02_ses-test_acq-singleband_run-01_dwi.bval", # wrong sub id
"/sub-01/sub-01_ses-test_acq-singleband_run-01_dwi.bvec", # ses dir missed
"/ses-test/sub-01_ses-test_acq-singleband_run-01_dwi.json", # sub id dir missed
]

result = validate_bids(examples, dummy_paths=True)
assert result["path_tracking"] == examples


def test_write_report(tmp_path):
from bidsschematools.validator import write_report

Expand Down

0 comments on commit 58e236e

Please sign in to comment.