From afaebd5f6888401f71aeca130f0cae0f5b7dc9e3 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 19 Apr 2024 22:02:32 -0400 Subject: [PATCH 1/4] Preserve line ending while replacing in participants.tsv --- tools/schemacode/bidsschematools/migrations.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/schemacode/bidsschematools/migrations.py b/tools/schemacode/bidsschematools/migrations.py index 09848acbb9..684c5d2aa6 100644 --- a/tools/schemacode/bidsschematools/migrations.py +++ b/tools/schemacode/bidsschematools/migrations.py @@ -42,8 +42,11 @@ def migrate_participants(dataset_path: Path): os.rename(old_file, new_file) lgr.info(f" - renamed {old_file} to {new_file}") if ext == ".tsv": - migrated = new_file.read_text().replace("participant_id", "subject_id", 1) - new_file.write_text(migrated) + # Do manual .decode() and .encode() to avoid changing line endings + migrated = ( + new_file.read_bytes().decode().replace("participant_id", "subject_id", 1) + ) + new_file.write_bytes(migrated.encode()) lgr.info(f" - migrated content in {new_file}") From 114c68d9b160ea3d156a77d6cfa219b897f1972e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 19:26:56 -0400 Subject: [PATCH 2/4] Original copy of validate_datasets.yml from bids-examples --- .github/workflows/validate_bids-examples.yml | 90 ++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 .github/workflows/validate_bids-examples.yml diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml new file mode 100644 index 0000000000..0ed566aa65 --- /dev/null +++ b/.github/workflows/validate_bids-examples.yml @@ -0,0 +1,90 @@ +name: validate_datasets + +on: + push: + branches: ['**'] + pull_request: + branches: ['**'] + create: + branches: [master] + tags: ['**'] + schedule: + - cron: "0 4 * * 1" + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest, macos-latest, windows-latest] + bids-validator: [master, stable] + + runs-on: ${{ matrix.platform }} + + env: + TZ: Europe/Berlin + FORCE_COLOR: 1 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: 18 + + - name: Install BIDS validator (stable) + if: "matrix.bids-validator == 'stable'" + run: | + npm install -g bids-validator + + - name: Install BIDS validator (master) + if: "matrix.bids-validator == 'master'" + run: | + pushd .. + # Get npm 7+ + npm install -g npm + git clone --depth 1 https://github.com/bids-standard/bids-validator + cd bids-validator + # Generate the full development node_modules + npm clean-install + # Build & bundle the bids-validator CLI package + npm -w bids-validator run build + # Generate a package to install globally + npm -w bids-validator pack + # Install the package globally + bash -c "npm install -g bids-validator-*.tgz" + popd + + - name: Display versions and environment information + run: | + echo $TZ + date + echo "npm"; npm --version + echo "node"; node --version + echo "bids-validator"; bids-validator --version + + - name: Check that no large files are present + if: "matrix.bids-validator == 'stable'" + run: | + echo "Checking for big files ..." + found=`find . -not -path "./.git*" -type f -size +500k` + if [ "$found" == "" ] + then + echo "No big files present, great!" + else + echo "Found big files:" + echo "$found" + exit 1; + fi + shell: bash + + - name: Validate all BIDS datasets using bids-validator + run: | + cat ./run_tests.sh + bash ./run_tests.sh + shell: bash From 7af3d7387204075036b187296a678afbd5ea0e5c Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 12 Apr 2024 19:49:39 -0400 Subject: [PATCH 3/4] Add installation of bids-examples, migration of them, and testing --- .github/workflows/validate_bids-examples.yml | 115 +++++++++++-------- 1 file changed, 68 insertions(+), 47 deletions(-) diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml index 0ed566aa65..3d685fbe76 100644 --- a/.github/workflows/validate_bids-examples.yml +++ b/.github/workflows/validate_bids-examples.yml @@ -2,14 +2,14 @@ name: validate_datasets on: push: - branches: ['**'] + branches: ['master'] pull_request: branches: ['**'] - create: - branches: [master] - tags: ['**'] - schedule: - - cron: "0 4 * * 1" +# create: +# branches: [master] +# tags: ['**'] +# schedule: +# - cron: "0 4 * * 1" concurrency: group: ${{ github.ref }} @@ -20,8 +20,9 @@ jobs: strategy: fail-fast: false matrix: - platform: [ubuntu-latest, macos-latest, windows-latest] - bids-validator: [master, stable] + platform: [ubuntu-latest] # , macos-latest, windows-latest] + bids-validator: [master-deno] + python-version: ["3.11"] runs-on: ${{ matrix.platform }} @@ -32,59 +33,79 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Node.js - uses: actions/setup-node@v4 + # Setup Python with bst + - uses: actions/setup-python@v5 with: - node-version: 18 + python-version: ${{ matrix.python-version }} + - name: "Install build dependencies" + run: pip install --upgrade build twine + - name: "Build source distribution and wheel" + run: python -m build tools/schemacode + - name: "Check distribution metadata" + run: twine check tools/schemacode/dist/* + - name: "Install bst tools from the build" + run: pip install $( ls tools/schemacode/dist/*.whl )[all] + - name: "Produce dump of the schema as schema.json" + run: bst -v export --output src/schema.json - - name: Install BIDS validator (stable) - if: "matrix.bids-validator == 'stable'" - run: | - npm install -g bids-validator + - uses: denoland/setup-deno@v1.1.2 + if: "matrix.bids-validator == 'master-deno'" + with: + deno-version: v1.x - - name: Install BIDS validator (master) - if: "matrix.bids-validator == 'master'" + - name: Install BIDS validator (master deno build) + if: "matrix.bids-validator == 'master-deno'" run: | pushd .. - # Get npm 7+ - npm install -g npm + # Let's use specific commit for now + # TODO: progress it once in a while + commit=a7b291b882a8c6184219ccb84faae255ba96203a git clone --depth 1 https://github.com/bids-standard/bids-validator cd bids-validator - # Generate the full development node_modules - npm clean-install - # Build & bundle the bids-validator CLI package - npm -w bids-validator run build - # Generate a package to install globally - npm -w bids-validator pack - # Install the package globally - bash -c "npm install -g bids-validator-*.tgz" + git fetch --depth 1 origin $commit; + echo -e '#!/bin/sh\n'"$PWD/bids-validator/bids-validator-deno \"\$@\"" >| /usr/local/bin/bids-validator + chmod a+x /usr/local/bin/bids-validator + which -a bids-validator + bids-validator --help popd - name: Display versions and environment information run: | echo $TZ date - echo "npm"; npm --version - echo "node"; node --version - echo "bids-validator"; bids-validator --version + echo -n "npm: "; npm --version + echo -n "node: "; node --version + echo -n "bids-validator: "; bids-validator --version + echo -n "python: "; python --version - - name: Check that no large files are present - if: "matrix.bids-validator == 'stable'" - run: | - echo "Checking for big files ..." - found=`find . -not -path "./.git*" -type f -size +500k` - if [ "$found" == "" ] - then - echo "No big files present, great!" - else - echo "Found big files:" - echo "$found" - exit 1; - fi + # Checkout bids-examples + - uses: actions/checkout@v4 + with: + # repository: bids-standard/bids-examples + # For now use the forked repository with support for deno validator + # from https://github.com/bids-standard/bids-examples/pull/435 + repository: yarikoptic/bids-examples + ref: deno-validator + path: bids-examples + + - name: Mark known not yet to be deno-legit BIDS datasets + run: touch {ds000117,ds000246,ds000247,ds000248,eeg_ds003645s_hed_demo,ieeg_motorMiller2007,ieeg_visual}/.SKIP_VALIDATION shell: bash + working-directory: bids-examples - - name: Validate all BIDS datasets using bids-validator - run: | - cat ./run_tests.sh - bash ./run_tests.sh + - name: Validate using bids-validator without migration + run: ./run_tests.sh + working-directory: bids-examples + + - name: Migrate all BIDS datasets + run: /bin/ls */dataset_description.json | sed -e 's,/.*,,g' | xargs bst migrate-datasets shell: bash + working-directory: bids-examples + + - name: Show migrated datasets diff + run: git diff + working-directory: bids-examples + + - name: Validate all BIDS datasets using bids-validator after migration + run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh + working-directory: bids-examples From ba2755748d28cbe4ed4d851fc6eae615d1e916c0 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 19 Apr 2024 21:56:28 -0400 Subject: [PATCH 4/4] make migrate continue on non-bids, use git mv under git --- .github/workflows/validate_bids-examples.yml | 6 ++- .../schemacode/bidsschematools/migrations.py | 49 +++++++++++++++++-- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml index 3d685fbe76..7911ad5c82 100644 --- a/.github/workflows/validate_bids-examples.yml +++ b/.github/workflows/validate_bids-examples.yml @@ -103,9 +103,13 @@ jobs: working-directory: bids-examples - name: Show migrated datasets diff - run: git diff + run: git diff HEAD working-directory: bids-examples + # TODO: commit as a merge from current state of bids-examples + # and prior bids-2.0 branch there, but overloading with new updated + # state and recording commit hash of bids-specification used. + - name: Validate all BIDS datasets using bids-validator after migration run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh working-directory: bids-examples diff --git a/tools/schemacode/bidsschematools/migrations.py b/tools/schemacode/bidsschematools/migrations.py index 684c5d2aa6..1cf0441cf1 100644 --- a/tools/schemacode/bidsschematools/migrations.py +++ b/tools/schemacode/bidsschematools/migrations.py @@ -1,7 +1,11 @@ import json import os import re +import subprocess +from functools import lru_cache +from itertools import chain from pathlib import Path +from typing import Optional import bidsschematools as bst import bidsschematools.utils @@ -11,10 +15,14 @@ TARGET_VERSION = "2.0.0" +class NotBIDSDatasetError(Exception): + pass + + def get_bids_version(dataset_path: Path) -> str: dataset_description = dataset_path / "dataset_description.json" if not dataset_description.exists(): - raise ValueError(f"dataset_description.json not found in {dataset_path}") + raise NotBIDSDatasetError(f"dataset_description.json not found in {dataset_path}") return json.loads(dataset_description.read_text())["BIDSVersion"] @@ -39,7 +47,7 @@ def migrate_participants(dataset_path: Path): old_file = dataset_path / f"participants{ext}" new_file = dataset_path / f"subjects{ext}" if old_file.exists(): - os.rename(old_file, new_file) + rename_path(old_file, new_file) lgr.info(f" - renamed {old_file} to {new_file}") if ext == ".tsv": # Do manual .decode() and .encode() to avoid changing line endings @@ -53,8 +61,12 @@ def migrate_participants(dataset_path: Path): def migrate_dataset(dataset_path): lgr.info(f"Migrating dataset at {dataset_path}") dataset_path = Path(dataset_path) - if get_bids_version(dataset_path) == TARGET_VERSION: - lgr.info(f"Dataset already at version {TARGET_VERSION}") + try: + if get_bids_version(dataset_path) == TARGET_VERSION: + lgr.info(f"Dataset already at version {TARGET_VERSION}") + return + except NotBIDSDatasetError: + lgr.warning("%s not a BIDS dataset, skipping", dataset_path) return # TODO: possibly add a check for BIDS version in dataset_description.json # and skip if already 2.0, although ideally transformations @@ -65,3 +77,32 @@ def migrate_dataset(dataset_path): ]: lgr.info(f" - applying migration {migration.__name__}") migration(dataset_path) + + +@lru_cache +def path_has_git(path: Path) -> bool: + return (path / ".git").exists() + + +def git_topdir(path: Path) -> Optional[Path]: + """Return top-level directory of a git repository containing path, + or None if not under git.""" + path = path.absolute() + for p in chain([path] if path.is_dir() else [], path.parents): + if path_has_git(p): + return p + return None + + +def rename_path(old_path: Path, new_path: Path): + """git aware rename. If under git, use git mv, otherwise just os.rename.""" + # if under git, use git mv but ensure that on border + # crossing (should just use DataLad and `mv` and it would do the right thing!) + if (old_git_top := git_topdir(old_path)) != (new_git_top := git_topdir(new_path)): + raise NotImplementedError( + f"Did not implement moving across git repo boundaries {old_git_top} -> {new_git_top}" + ) + if old_git_top: + subprocess.run(["git", "mv", str(old_path), str(new_path)], check=True, cwd=old_git_top) + else: + os.rename(old_path, new_path)