bids-standard · tsalo · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/src/appendices/entity-table.md b/src/appendices/entity-table.md
@@ -12,7 +12,7 @@ For example, if a file has an acquisition and reconstruction label, the
 acquisition entity must precede the reconstruction entity.
 REQUIRED and OPTIONAL entities for a given file type are denoted;
 empty cells imply that entities MUST NOT be specified.
-Entity formats indicate whether the value is alphanumeric
+Entity formats indicate whether the value is alphanumeric (potentially including `+` character(s))
-Entity formats indicate whether the value is alphanumeric (potentially including `+` character(s))
+Entity formats indicate whether the value is alphanumeric (and possibly including `+` character(s))
-Entity formats indicate whether the value is alphanumeric (potentially including `+` character(s))
+Entity formats indicate whether the value is alphanumeric (and possibly including `+` character(s))
 (`<label>`) or numeric (`<index>`).
 
 A general introduction to entities is given in the section on

@@ -49,7 +49,7 @@ Each entity has the following attributes:
     1.  *Index*: A non-negative integer, potentially zero-padded for
         consistent width.
 
-    1.  *Label*: An alphanumeric string.
+    1.  *Label*: An alphanumeric (and possibly including `+` character(s)) string.
         Note that labels MUST not collide when casing is ignored
         (see [Case collision intolerance](#case-collision-intolerance)).
 
@@ -1106,7 +1106,7 @@ A guide for using macros can be found at
 Additional files and directories containing raw data MAY be added as needed for
 special cases.
 All non-standard file entities SHOULD conform to BIDS-style naming conventions, including
-alphabetic entities and suffixes and alphanumeric labels/indices.
+alphabetic entities and suffixes and alphanumeric (and possibly including `+` character(s)) labels/indices.
 Non-standard suffixes SHOULD reflect the nature of the data, and existing
 entities SHOULD be used when appropriate.
 For example, an ASSET calibration scan might be named

@@ -110,7 +110,7 @@ derived_from:
     `sample-<label>` entity from which a sample is derived,
     for example a slice of tissue (`sample-02`) derived from a block of tissue (`sample-01`).
   type: string
-  pattern: ^sample-[0-9a-zA-Z]+$
+  pattern: ^sample-[0-9a-zA-Z+]+$
 desc_id:
   name: desc_id
   display_name: Description Label
@@ -125,7 +125,7 @@ desc_id:
     its `desc_id` column SHOULD contain all labels of the `desc` entity)
     used across the entire derivative dataset.
   type: string
-  pattern: ^desc-[0-9a-zA-Z]+$
+  pattern: ^desc-[0-9a-zA-Z+]+$
 description:
   name: description
   display_name: Description
@@ -369,7 +369,7 @@ participant_id:
     A participant identifier of the form `sub-<label>`,
     matching a participant entity found in the dataset.
   type: string
-  pattern: ^sub-[0-9a-zA-Z]+$
+  pattern: ^sub-[0-9a-zA-Z+]+$
 placement__motion:
   name: placement
   display_name: Placement
@@ -434,7 +434,7 @@ sample_id:
     A sample identifier of the form `sample-<label>`,
     matching a sample entity found in the dataset.
   type: string
-  pattern: ^sample-[0-9a-zA-Z]+$
+  pattern: ^sample-[0-9a-zA-Z+]+$
 sample_type:
   name: sample_type
   display_name: Sample type
@@ -466,7 +466,7 @@ session_id:
     A session identifier of the form `ses-<label>`,
     matching a session found in the dataset.
   type: string
-  pattern: ^ses-[0-9a-zA-Z]+$
+  pattern: ^ses-[0-9a-zA-Z+]+$
 sex:
   name: sex
   display_name: Sex

@@ -86,7 +86,8 @@ index:
 label:
   display_name: label
   description: |
-    An alphanumeric value, possibly prefixed with arbitrary number of 0s for consistent indentation,
+    An alphanumeric (and possibly including `+` character(s)) value, possibly prefixed with arbitrary
+    number of 0s for consistent indentation,
     for example, it is `rest` in `task-rest` following `task-<label>` specification.
     Note that labels MUST not collide when casing is ignored
     (see [Case collision intolerance](SPEC_ROOT/common-principles.md#case-collision-intolerance)).

@@ -76,7 +76,7 @@ direction:
   name: dir
   display_name: Phase-Encoding Direction
   description: |
-    The `dir-<label>` entity can be set to an arbitrary alphanumeric label
+    The `dir-<label>` entity can be set to an arbitrary legitimate label
     (for example, `dir-LR` or `dir-AP`)
     to distinguish different phase-encoding directions.
 

@@ -9,8 +9,16 @@ index:
 label:
   display_name: Label
   description: |
-    Freeform labels without special characters.
-  pattern: '[0-9a-zA-Z]+'
+    Free-form labels with alphanumeric and plus (+) characters.
+
+    Plus signs MAY be used to concatenate multiple applicable labels,
+    but no relationship is established by a partial match.
+    In particular, the inheritance principle does not connect files
+    containing entities such as `<name>-x+y` with either `<name>-x` or `<name>-y`.
+    For example, metadata stored in a file at the root of the dataset with name `/acq-6p_T2w.json`
+    does not apply to files with partially matching "acquisition" entity values
+    such as `/sub-1/anat/sub-1_acq-6p+s2_T2w.nii`.
+  pattern: '[0-9a-zA-Z+]+'
 # Metadata types
 boolean:
   display_name: Boolean
@@ -59,7 +67,7 @@ dataset_relative:
     The validation for this format is minimal.
     It simply ensures that the value is a string with any characters that may appear in a valid path,
     without starting with "/" (an absolute path).
-  pattern: '(?!/)[0-9a-zA-Z/\_\-\.]+'
+  pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+'
 date:
   display_name: Date
   description: |
@@ -81,7 +89,7 @@ file_relative:
     The validation for this format is minimal.
     It simply ensures that the value is a string with any characters that may appear in a valid path,
     without starting with "/" (an absolute path).
-  pattern: '(?!/)[0-9a-zA-Z/\_\-\.]+'
+  pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+'
 participant_relative:
   display_name: Path relative to the participant directory
   description: |
@@ -91,7 +99,7 @@ participant_relative:
     It simply ensures that the value is a string with any characters that may appear in a valid path,
     without starting with "/" (an absolute path) or "sub/"
     (a relative path starting with the participant directory, rather than relative to that directory).
-  pattern: '(?!/)(?!sub-)[0-9a-zA-Z/\_\-\.]+'
+  pattern: '(?!/)(?!sub-)[0-9a-zA-Z+/\_\-\.]+'
 rrid:
   display_name: Research resource identifier
   description: |
@@ -106,7 +114,7 @@ stimuli_relative:
     It simply ensures that the value is a string with any characters that may appear in a valid path,
     without starting with "/" (an absolute path) or "stimuli/"
     (a relative path starting with the stimuli directory, rather than relative to that directory).
-  pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z/\_\-\.]+'
+  pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z+/\_\-\.]+'
 time:
   display_name: Time
   description: |

@@ -3676,9 +3676,10 @@ TaskName:
     Name of the task.
     No two tasks should have the same name.
     The task label included in the filename is derived from this `"TaskName"` field
-    by removing all non-alphanumeric characters (that is, all except those matching `[0-9a-zA-Z]`).
-    For example `"TaskName"` `"faces n-back"` or `"head nodding"` will correspond to task labels
-    `facesnback` and `headnodding`, respectively.
+    by removing all non-alphanumeric or `+` characters (that is, all except those matching `[0-9a-zA-Z+]`),
+    and potentially replacing spaces with `+` to ease readability.
+    For example `"TaskName"` `"faces n-back"` or `"head nodding"` could correspond to task labels
+    `faces+n+back` or `facesnback` and `head+nodding` or `headnodding`, respectively.
   type: string
 TermURL:
   name: TermURL

@@ -1,7 +1,7 @@
 
 SUMMARY:
 0 out of 1 files were successfully validated, using the following regular expressions:
-	- `.*?/sub-(?P<subject>[0-9a-zA-Z]+)/(|ses-(?P<session>[0-9a-zA-Z]+)/)anat/sub-(?P=subject)(|_ses-(?P=session))(|_acq-(?P<acquisition>[0-9a-zA-Z]+))(|_ce-(?P<ceagent>[0-9a-zA-Z]+))(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))(|_run-(?P<run>[0-9a-zA-Z]+))(|_part-(?P<part>(mag|phase|real|imag)))_(T1w|T2w|PDw|T2starw|FLAIR|inplaneT1|inplaneT2|PDT2|angio|T2star)\.(nii.gz|nii|json)$`
+	- `.*?/sub-(?P<subject>[0-9a-zA-Z+]+)/(|ses-(?P<session>[0-9a-zA-Z+]+)/)anat/sub-(?P=subject)(|_ses-(?P=session))(|_acq-(?P<acquisition>[0-9a-zA-Z+]+))(|_ce-(?P<ceagent>[0-9a-zA-Z+]+))(|_rec-(?P<reconstruction>[0-9a-zA-Z+]+))(|_run-(?P<run>[0-9a-zA-Z+]+))(|_part-(?P<part>(mag|phase|real|imag)))_(T1w|T2w|PDw|T2starw|FLAIR|inplaneT1|inplaneT2|PDT2|angio|T2star)\.(nii.gz|nii|json)$`
 The following files were not matched by any regex schema entry:
 	* `/home/chymera/.data2/datalad/000026/noncompliant/sub-EXC022/anat/sub-EXC022_ses-MRI_flip-1_VFA.nii.gz
 The following mandatory regex schema entries did not match any files:
@@ -18,8 +18,8 @@ def test_entity_rule(schema_obj):
     nii_rule = rules._entity_rule(rule, schema_obj)
     assert nii_rule == {
         "regex": (
-            r"sub-(?P<subject>[0-9a-zA-Z]+)/"
-            r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
+            r"sub-(?P<subject>[0-9a-zA-Z+]+)/"
+            r"(?:ses-(?P<session>[0-9a-zA-Z+]+)/)?"
             r"(?P<datatype>anat)/"
             r"(?(subject)sub-(?P=subject)_)"
             r"(?(session)ses-(?P=session)_)"
@@ -50,8 +50,8 @@ def test_entity_rule(schema_obj):
     json_rule = rules._entity_rule(rule, schema_obj)
     assert json_rule == {
         "regex": (
-            r"(?:sub-(?P<subject>[0-9a-zA-Z]+)/)?"
-            r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
+            r"(?:sub-(?P<subject>[0-9a-zA-Z+]+)/)?"
+            r"(?:ses-(?P<session>[0-9a-zA-Z+]+)/)?"
             r"(?:(?P<datatype>anat)/)?"
             r"(?(subject)sub-(?P=subject)_)"
             r"(?(session)ses-(?P=session)_)"

@@ -72,12 +72,12 @@ def test_write_report(tmp_path):
 
     validation_result["schema_tracking"] = [
         {
-            "regex": ".*?/sub-(?P<subject>[0-9a-zA-Z]+)/"
-            "(|ses-(?P<session>[0-9a-zA-Z]+)/)anat/sub-(?P=subject)"
-            "(|_ses-(?P=session))(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
-            "(|_ce-(?P<ceagent>[0-9a-zA-Z]+))"
-            "(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
-            "(|_run-(?P<run>[0-9a-zA-Z]+))"
+            "regex": ".*?/sub-(?P<subject>[0-9a-zA-Z+]+)/"
+            "(|ses-(?P<session>[0-9a-zA-Z+]+)/)anat/sub-(?P=subject)"
+            "(|_ses-(?P=session))(|_acq-(?P<acquisition>[0-9a-zA-Z+]+))"
+            "(|_ce-(?P<ceagent>[0-9a-zA-Z+]+))"
+            "(|_rec-(?P<reconstruction>[0-9a-zA-Z+]+))"
+            "(|_run-(?P<run>[0-9a-zA-Z+]+))"
             "(|_part-(?P<part>(mag|phase|real|imag)))"
             "_(T1w|T2w|PDw|T2starw|FLAIR|inplaneT1|inplaneT2|PDT2|angio|T2star)"
             "\\.(nii.gz|nii|json)$",
@@ -86,12 +86,12 @@ def test_write_report(tmp_path):
     ]
     validation_result["schema_listing"] = [
         {
-            "regex": ".*?/sub-(?P<subject>[0-9a-zA-Z]+)/"
-            "(|ses-(?P<session>[0-9a-zA-Z]+)/)anat/sub-(?P=subject)"
-            "(|_ses-(?P=session))(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
-            "(|_ce-(?P<ceagent>[0-9a-zA-Z]+))"
-            "(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
-            "(|_run-(?P<run>[0-9a-zA-Z]+))"
+            "regex": ".*?/sub-(?P<subject>[0-9a-zA-Z+]+)/"
+            "(|ses-(?P<session>[0-9a-zA-Z+]+)/)anat/sub-(?P=subject)"
+            "(|_ses-(?P=session))(|_acq-(?P<acquisition>[0-9a-zA-Z+]+))"
+            "(|_ce-(?P<ceagent>[0-9a-zA-Z+]+))"
+            "(|_rec-(?P<reconstruction>[0-9a-zA-Z+]+))"
+            "(|_run-(?P<run>[0-9a-zA-Z+]+))"
             "(|_part-(?P<part>(mag|phase|real|imag)))"
             "_(T1w|T2w|PDw|T2starw|FLAIR|inplaneT1|inplaneT2|PDT2|angio|T2star)"
             "\\.(nii.gz|nii|json)$",