Skip to content

Commit

Permalink
Merge pull request #1838 from effigies/rf/schema-column-definitions
Browse files Browse the repository at this point in the history
feat(schema): Provide default JSON column definition for "conventional" columns
  • Loading branch information
rwblair authored Jul 25, 2024
2 parents 4397361 + 40ca7bb commit 1d465f5
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 67 deletions.
13 changes: 12 additions & 1 deletion src/metaschema.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,18 @@
"patternProperties": {
"^[a-zA-Z0-9_]+$": {
"allOf": [
{ "$ref": "#/definitions/termTypes/JSONSchema" },
{
"anyOf": [
{ "$ref": "#/definitions/termTypes/JSONSchema" },
{
"type": "object",
"properties": {
"definition": { "type": "object" }
},
"required": ["definition"]
}
]
},
{ "$ref": "#/definitions/termTypes/general" },
{ "$ref": "#/definitions/termTypes/nameValue" },
{
Expand Down
115 changes: 69 additions & 46 deletions src/schema/objects/columns.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,20 @@ age:
It is recommended to tag participant ages that are 89 or higher as 89+,
for privacy purposes.
type: number
unit: year
definition: {
"LongName": "Subject age",
"Description": "Subject age in postnatal years",
"Units": "year",
}
cardiac:
name: cardiac
display_name: Cardiac measurement
description: |
continuous pulse measurement
type: number
definition: {
"Description": "continuous pulse measurement",
"Units": "mV"
}
channel:
name: channel
display_name: Channel
Expand Down Expand Up @@ -185,6 +191,27 @@ group__channel:
handedness:
name: handedness
display_name: Subject handedness
definition: {
"LongName": "Subject handedness",
"Description": "String value indicating one of \"left\", \"right\", or \"ambidextrous\".",
"Levels": {
"left": "Left-handed",
"l": "Left-handed",
"L": "Left-handed",
"LEFT": "Left-handed",
"Left": "Left-handed",
"right": "Right-handed",
"r": "Right-handed",
"R": "Right-handed",
"RIGHT": "Right-handed",
"Right": "Right-handed",
"ambidextrous": "Ambidextrous",
"a": "Ambidextrous",
"A": "Ambidextrous",
"AMBIDEXTROUS": "Ambidextrous",
"Ambidextrous": "Ambidextrous",
}
}
description: |
String value indicating one of "left", "right", "ambidextrous".
Expand All @@ -194,25 +221,6 @@ handedness:
For "ambidextrous", use one of these values: `ambidextrous`, `a`, `A`, `AMBIDEXTROUS`,
`Ambidextrous`.
type: string
# TODO: Add definitions for these values. (perhaps don't specify)
enum:
- left
- l
- L
- LEFT
- Left
- right
- r
- R
- RIGHT
- Right
- ambidextrous
- a
- A
- AMBIDEXTROUS
- Ambidextrous
- n/a
hemisphere:
name: hemisphere
display_name: Electrode hemisphere
Expand Down Expand Up @@ -368,7 +376,9 @@ pathology:
When different from `healthy`, pathology SHOULD be specified.
The pathology may be specified in either `samples.tsv` or
`sessions.tsv`, depending on whether the pathology changes over time.
type: string
definition: {
"Description": "Description of the pathology of the sample or type of control."
}
participant_id:
name: participant_id
display_name: Participant ID
Expand Down Expand Up @@ -429,7 +439,10 @@ respiratory:
display_name: Respiratory measurement
description: |
continuous breathing measurement
type: number
definition: {
"Description": "continuous measurements by respiration belt",
"Units": "mV"
}
response_time:
name: response_time
display_name: Response time
Expand Down Expand Up @@ -494,25 +507,27 @@ sex:
For "female", use one of these values: `female`, `f`, `F`, `FEMALE`, `Female`.
For "other", use one of these values: `other`, `o`, `O`, `OTHER`, `Other`.
type: string
# TODO: Add definitions for these values. (perhaps don't specify)
enum:
- male
- m
- M
- MALE
- Male
- female
- f
- F
- FEMALE
- Female
- other
- o
- O
- OTHER
- Other
- n/a
definition: {
"LongName": "sex",
"Description": "String value indicating phenotypical sex.",
"Levels": {
"F": "Female",
"FEMALE": "Female",
"Female": "Female",
"f": "Female",
"female": "Female",
"M": "Male",
"MALE": "Male",
"Male": "Male",
"m": "Male",
"male": "Male",
"O": "Other",
"OTHER": "Other",
"Other": "Other",
"o": "Other",
"other": "Other",
}
}
short_channel:
name: short_channel
display_name: Short Channel
Expand Down Expand Up @@ -568,7 +583,10 @@ species:
(for example, `homo sapiens`, `mus musculus`, `rattus norvegicus`).
For backwards compatibility, if `species` is absent, the participant is assumed to be
`homo sapiens`.
type: string
definition: {
"Description":
"binomial species name from the NCBI Taxonomy (https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)"
}
status:
name: status
display_name: Channel status
Expand Down Expand Up @@ -609,7 +627,9 @@ strain:
description: |
For species different from `homo sapiens`, string value indicating
the strain of the species, for example: `C57BL/6J`.
type: string
definition: {
"Description": "name of the strain of the species"
}
strain_rrid:
name: strain_rrid
display_name: Strain RRID
Expand Down Expand Up @@ -649,7 +669,10 @@ trigger:
display_name: Trigger
description: |
continuous measurement of the scanner trigger signal
type: number
definition: {
"Description": "continuous measurement of the scanner trigger signal",
"Units": "arbitrary"
}
# type column in channels.tsv files
type__channels:
name: type
Expand Down
16 changes: 8 additions & 8 deletions tools/schemacode/bidsschematools/render/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def _make_object_table(
"columns": "column",
}.get(table_type)

for element in subschema.keys():
field_name = subschema[element]["name"]
for element, field in subschema.items():
field_name = field["name"]
# NOTE: Link to the glossary entry,
# except for subobjects (if table_type) and
# "additional columns" (field_name.startswith("**"))
Expand All @@ -90,19 +90,19 @@ def _make_object_table(
"[DEPRECATED](SPEC_ROOT/common-principles.md#definitions)",
)

type_string = utils.resolve_metadata_type(subschema[element])
type_string = utils.resolve_metadata_type(field)

description = utils.normalize_requirements(
subschema[element]["description"] + " " + description_addendum
f"{field['description']} {description_addendum}".strip()
)

# Append a list of valid values, if provided, to the description.
# If there are a lot of valid values, this will add a link to the description linking to
# the associated glossary entry.
if (
"enum" in subschema[element].keys()
and len(subschema[element]["enum"]) >= n_values_to_combine
):
levels = subschema[element].get("enum", []) or subschema[element].get(
"definition", {}
).get("Levels", [])
if len(levels) >= n_values_to_combine:
glossary_entry = f"{GLOSSARY_PATH}.md#objects.{table_type}.{element}"
valid_values_str = (
f"For a list of valid values for this {element_type}, see the "
Expand Down
24 changes: 13 additions & 11 deletions tools/schemacode/bidsschematools/render/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,19 @@ def make_glossary(schema, src_path=None):
elif obj["type"] == "format":
text += f"**Regular expression**: `{obj_def['pattern']}`\n\n"

keys_to_drop = ["description", "display_name", "name", "value", "pattern"]
if "enum" in obj_def.keys():
allowed_values = []
keys_to_drop.append("enum")
for value in obj_def["enum"]:
if isinstance(value, str):
allowed_values.append(value)
else:
allowed_values.append(value["name"])

text += f"**Allowed values**: `{'`, `'.join(allowed_values)}`\n\n"
keys_to_drop = [
"description",
"display_name",
"name",
"value",
"pattern",
"enum",
"definition",
]
levels = list(obj_def.get("enum", []) or obj_def.get("definition", {}).get("Levels", {}))
if levels:
levels = [level["name"] if isinstance(level, dict) else level for level in levels]
text += f"**Allowed values**: `{'`, `'.join(levels)}`\n\n"

text += f"**Description**:\n{obj_desc}\n\n"

Expand Down
25 changes: 24 additions & 1 deletion tools/schemacode/bidsschematools/render/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,21 @@ def resolve_metadata_type(definition):

string = " or ".join(substrings)

elif "definition" in definition:
json_def = definition["definition"]

if "Delimiter" in json_def:
# Delimiter indicates the value must be parsed. For BIDS purposes,
# this is a string, even if the parsed array is of numbers.
string = "string"
elif "Levels" in json_def:
# JSON keys are always strings.
string = "string"
elif "Units" in json_def:
# Values with units are always (any exceptions?) numbers.
string = "number"
else:
string = "string or number"
else:
# This clause should only catch $refs.
# The schema should be deferenced by this point, so $refs should not exist.
Expand All @@ -246,7 +261,15 @@ def describe_valid_values(definition):
str : A sentence describing valid values for the object.
"""
description = ""
if "anyOf" in definition.keys():
if "anyOf" in definition:
return description

if "definition" in definition:
levels = definition["definition"].get("Levels")
if levels:
description = (
f"Unless redefined in a sidecar file, must be one of: {', '.join(levels)}."
)
return description

if definition["type"] == "boolean":
Expand Down

0 comments on commit 1d465f5

Please sign in to comment.