From 74eda56cc7c4483e6516963659c192389f09a825 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 25 Jul 2024 12:52:11 -0400 Subject: [PATCH 1/3] feat(metaschema): Allow column definition object as an alternative to JSON Schema --- src/metaschema.json | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/metaschema.json b/src/metaschema.json index ce500d6418..c0462edcaf 100644 --- a/src/metaschema.json +++ b/src/metaschema.json @@ -82,7 +82,18 @@ "patternProperties": { "^[a-zA-Z0-9_]+$": { "allOf": [ - { "$ref": "#/definitions/termTypes/JSONSchema" }, + { + "anyOf": [ + { "$ref": "#/definitions/termTypes/JSONSchema" }, + { + "type": "object", + "properties": { + "definition": { "type": "object" } + }, + "required": ["definition"] + } + ] + }, { "$ref": "#/definitions/termTypes/general" }, { "$ref": "#/definitions/termTypes/nameValue" }, { From 8fdfdb98745194215964811bc554f4fb3260b4f4 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 25 Jul 2024 12:53:14 -0400 Subject: [PATCH 2/3] feat(schema): Provide default JSON column definitions --- src/schema/objects/columns.yaml | 115 +++++++++++++++++++------------- 1 file changed, 69 insertions(+), 46 deletions(-) diff --git a/src/schema/objects/columns.yaml b/src/schema/objects/columns.yaml index 9128a235ba..d93145430e 100644 --- a/src/schema/objects/columns.yaml +++ b/src/schema/objects/columns.yaml @@ -40,14 +40,20 @@ age: It is recommended to tag participant ages that are 89 or higher as 89+, for privacy purposes. - type: number - unit: year + definition: { + "LongName": "Subject age", + "Description": "Subject age in postnatal years", + "Units": "year", + } cardiac: name: cardiac display_name: Cardiac measurement description: | continuous pulse measurement - type: number + definition: { + "Description": "continuous pulse measurement", + "Units": "mV" + } channel: name: channel display_name: Channel @@ -185,6 +191,27 @@ group__channel: handedness: name: handedness display_name: Subject handedness + definition: { + "LongName": "Subject handedness", + "Description": "String value indicating one of \"left\", \"right\", or \"ambidextrous\".", + "Levels": { + "left": "Left-handed", + "l": "Left-handed", + "L": "Left-handed", + "LEFT": "Left-handed", + "Left": "Left-handed", + "right": "Right-handed", + "r": "Right-handed", + "R": "Right-handed", + "RIGHT": "Right-handed", + "Right": "Right-handed", + "ambidextrous": "Ambidextrous", + "a": "Ambidextrous", + "A": "Ambidextrous", + "AMBIDEXTROUS": "Ambidextrous", + "Ambidextrous": "Ambidextrous", + } + } description: | String value indicating one of "left", "right", "ambidextrous". @@ -194,25 +221,6 @@ handedness: For "ambidextrous", use one of these values: `ambidextrous`, `a`, `A`, `AMBIDEXTROUS`, `Ambidextrous`. - type: string - # TODO: Add definitions for these values. (perhaps don't specify) - enum: - - left - - l - - L - - LEFT - - Left - - right - - r - - R - - RIGHT - - Right - - ambidextrous - - a - - A - - AMBIDEXTROUS - - Ambidextrous - - n/a hemisphere: name: hemisphere display_name: Electrode hemisphere @@ -368,7 +376,9 @@ pathology: When different from `healthy`, pathology SHOULD be specified. The pathology may be specified in either `samples.tsv` or `sessions.tsv`, depending on whether the pathology changes over time. - type: string + definition: { + "Description": "Description of the pathology of the sample or type of control." + } participant_id: name: participant_id display_name: Participant ID @@ -429,7 +439,10 @@ respiratory: display_name: Respiratory measurement description: | continuous breathing measurement - type: number + definition: { + "Description": "continuous measurements by respiration belt", + "Units": "mV" + } response_time: name: response_time display_name: Response time @@ -494,25 +507,27 @@ sex: For "female", use one of these values: `female`, `f`, `F`, `FEMALE`, `Female`. For "other", use one of these values: `other`, `o`, `O`, `OTHER`, `Other`. - type: string - # TODO: Add definitions for these values. (perhaps don't specify) - enum: - - male - - m - - M - - MALE - - Male - - female - - f - - F - - FEMALE - - Female - - other - - o - - O - - OTHER - - Other - - n/a + definition: { + "LongName": "sex", + "Description": "String value indicating phenotypical sex.", + "Levels": { + "F": "Female", + "FEMALE": "Female", + "Female": "Female", + "f": "Female", + "female": "Female", + "M": "Male", + "MALE": "Male", + "Male": "Male", + "m": "Male", + "male": "Male", + "O": "Other", + "OTHER": "Other", + "Other": "Other", + "o": "Other", + "other": "Other", + } + } short_channel: name: short_channel display_name: Short Channel @@ -568,7 +583,10 @@ species: (for example, `homo sapiens`, `mus musculus`, `rattus norvegicus`). For backwards compatibility, if `species` is absent, the participant is assumed to be `homo sapiens`. - type: string + definition: { + "Description": + "binomial species name from the NCBI Taxonomy (https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)" + } status: name: status display_name: Channel status @@ -609,7 +627,9 @@ strain: description: | For species different from `homo sapiens`, string value indicating the strain of the species, for example: `C57BL/6J`. - type: string + definition: { + "Description": "name of the strain of the species" + } strain_rrid: name: strain_rrid display_name: Strain RRID @@ -649,7 +669,10 @@ trigger: display_name: Trigger description: | continuous measurement of the scanner trigger signal - type: number + definition: { + "Description": "continuous measurement of the scanner trigger signal", + "Units": "arbitrary" + } # type column in channels.tsv files type__channels: name: type From 40ca7bbd2f9bd2d981df29707d8ff9f3e9ab78de Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 25 Jul 2024 12:54:32 -0400 Subject: [PATCH 3/3] feat(spec): Render definition and enum columns alike --- .../bidsschematools/render/tables.py | 16 ++++++------ .../schemacode/bidsschematools/render/text.py | 24 ++++++++++-------- .../bidsschematools/render/utils.py | 25 ++++++++++++++++++- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/tools/schemacode/bidsschematools/render/tables.py b/tools/schemacode/bidsschematools/render/tables.py index 9366ae5725..31b6e13fd0 100644 --- a/tools/schemacode/bidsschematools/render/tables.py +++ b/tools/schemacode/bidsschematools/render/tables.py @@ -74,8 +74,8 @@ def _make_object_table( "columns": "column", }.get(table_type) - for element in subschema.keys(): - field_name = subschema[element]["name"] + for element, field in subschema.items(): + field_name = field["name"] # NOTE: Link to the glossary entry, # except for subobjects (if table_type) and # "additional columns" (field_name.startswith("**")) @@ -90,19 +90,19 @@ def _make_object_table( "[DEPRECATED](SPEC_ROOT/common-principles.md#definitions)", ) - type_string = utils.resolve_metadata_type(subschema[element]) + type_string = utils.resolve_metadata_type(field) description = utils.normalize_requirements( - subschema[element]["description"] + " " + description_addendum + f"{field['description']} {description_addendum}".strip() ) # Append a list of valid values, if provided, to the description. # If there are a lot of valid values, this will add a link to the description linking to # the associated glossary entry. - if ( - "enum" in subschema[element].keys() - and len(subschema[element]["enum"]) >= n_values_to_combine - ): + levels = subschema[element].get("enum", []) or subschema[element].get( + "definition", {} + ).get("Levels", []) + if len(levels) >= n_values_to_combine: glossary_entry = f"{GLOSSARY_PATH}.md#objects.{table_type}.{element}" valid_values_str = ( f"For a list of valid values for this {element_type}, see the " diff --git a/tools/schemacode/bidsschematools/render/text.py b/tools/schemacode/bidsschematools/render/text.py index f26ba73204..ce70ae0ded 100644 --- a/tools/schemacode/bidsschematools/render/text.py +++ b/tools/schemacode/bidsschematools/render/text.py @@ -175,17 +175,19 @@ def make_glossary(schema, src_path=None): elif obj["type"] == "format": text += f"**Regular expression**: `{obj_def['pattern']}`\n\n" - keys_to_drop = ["description", "display_name", "name", "value", "pattern"] - if "enum" in obj_def.keys(): - allowed_values = [] - keys_to_drop.append("enum") - for value in obj_def["enum"]: - if isinstance(value, str): - allowed_values.append(value) - else: - allowed_values.append(value["name"]) - - text += f"**Allowed values**: `{'`, `'.join(allowed_values)}`\n\n" + keys_to_drop = [ + "description", + "display_name", + "name", + "value", + "pattern", + "enum", + "definition", + ] + levels = list(obj_def.get("enum", []) or obj_def.get("definition", {}).get("Levels", {})) + if levels: + levels = [level["name"] if isinstance(level, dict) else level for level in levels] + text += f"**Allowed values**: `{'`, `'.join(levels)}`\n\n" text += f"**Description**:\n{obj_desc}\n\n" diff --git a/tools/schemacode/bidsschematools/render/utils.py b/tools/schemacode/bidsschematools/render/utils.py index 873699058e..97b3eefa0f 100644 --- a/tools/schemacode/bidsschematools/render/utils.py +++ b/tools/schemacode/bidsschematools/render/utils.py @@ -222,6 +222,21 @@ def resolve_metadata_type(definition): string = " or ".join(substrings) + elif "definition" in definition: + json_def = definition["definition"] + + if "Delimiter" in json_def: + # Delimiter indicates the value must be parsed. For BIDS purposes, + # this is a string, even if the parsed array is of numbers. + string = "string" + elif "Levels" in json_def: + # JSON keys are always strings. + string = "string" + elif "Units" in json_def: + # Values with units are always (any exceptions?) numbers. + string = "number" + else: + string = "string or number" else: # This clause should only catch $refs. # The schema should be deferenced by this point, so $refs should not exist. @@ -246,7 +261,15 @@ def describe_valid_values(definition): str : A sentence describing valid values for the object. """ description = "" - if "anyOf" in definition.keys(): + if "anyOf" in definition: + return description + + if "definition" in definition: + levels = definition["definition"].get("Levels") + if levels: + description = ( + f"Unless redefined in a sidecar file, must be one of: {', '.join(levels)}." + ) return description if definition["type"] == "boolean":