diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c57935..d7b4f9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +# 1.9.0 2021-06-28 + +* Provides new ETL routine written in Java, that will replace all Jython scripts at some point [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/85) +* Support for nf-core pipeline result registration [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/85) +* Provides metadata validation for imaging data (OMERO etl). [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/83) + ## 1.8.0 2021-05-11 * Add example Java dropbox diff --git a/drop-boxes/register-all-dropbox/README.md b/drop-boxes/register-all-dropbox/README.md new file mode 100644 index 0000000..20dbdf3 --- /dev/null +++ b/drop-boxes/register-all-dropbox/README.md @@ -0,0 +1,16 @@ +# New ETL logic written in Java + +Please find the source code of the ETL routine that this article is referring to in the +[Java openBIS dropboxes](https://github.com/qbicsoftware/java-openbis-dropboxes) Github repository. + +## Installation + +Please provide the Java binaries as JAR from the [Java openBIS dropbox](https://github.com/qbicsoftware/java-openbis-dropboxes) in this repositories +folder `./lib`. + +The DSS needs to be restarted in order to activate this dropbox. + +## ETL routine + +This dropbox expects a folder containing data and creates new openBIS dataset from it. For more information +please visit [Java openBIS dropbox](https://github.com/qbicsoftware/java-openbis-dropboxes). diff --git a/drop-boxes/register-all-dropbox/lib/README.md b/drop-boxes/register-all-dropbox/lib/README.md new file mode 100644 index 0000000..8b06845 --- /dev/null +++ b/drop-boxes/register-all-dropbox/lib/README.md @@ -0,0 +1,2 @@ +Put the compiled Java binaries as JARs in this directory in order +to be loaded by the openBIS DSS class loader on DSS startup. \ No newline at end of file diff --git a/drop-boxes/register-all-dropbox/plugin.properties b/drop-boxes/register-all-dropbox/plugin.properties new file mode 100644 index 0000000..f49d390 --- /dev/null +++ b/drop-boxes/register-all-dropbox/plugin.properties @@ -0,0 +1,12 @@ +# +# Drop box for registering a fastq file as a data set +# + +incoming-data-completeness-condition = marker-file +top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JavaTopLevelDataSetHandlerV2 +program-class = life.qbic.registration.MainETL +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor +# Variables: +# incoming-root-dir +# Path to the directory which contains incoming directories for drop boxes. +incoming-dir = ${incoming-root-dir}/QBiC-register-all-data diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 6872776..ea51a45 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -21,6 +21,7 @@ from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria +from life.qbic.utils import ImagingMetadataValidator #class OmeroError(Error): @@ -58,7 +59,7 @@ INCOMING_DATE_FORMAT = '%d.%m.%Y' OPENBIS_DATE_FORMAT = '%Y-%m-%d' -PROPPERTY_FILTER_LIST = ["IMAGE_FILE_NAME", "INSTRUMENT_USER", "IMAGING_DATE"] +PROPPERTY_FILTER_LIST = ["IMAGE_FILENAME", "INSTRUMENT_USER", "IMAGING_DATE"] def mapDateString(date_string): return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT) @@ -177,7 +178,7 @@ def validatePropertyNames(property_names): """ # fast validation without parser object. - required_names = ["IMAGE_FILE_NAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] + required_names = ["IMAGE_FILENAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] for name in required_names: if not name in property_names: @@ -192,7 +193,7 @@ def getPropertyMap(line, property_names): properties = {} property_values = line.split("\t") - for i in range(1, len(property_names)): #exclude first col (filename) + for i in range(0, len(property_names)): #do not exclude first col (filename), the schema checks for it ##remove trailing newline, and replace space with underscore name = property_names[i].rstrip('\n').replace(" ", "_") value = property_values[i].rstrip('\n').replace(" ", "_") @@ -201,6 +202,38 @@ def getPropertyMap(line, property_names): return properties +def isFloat(value): + try: + float(value) + return True + except ValueError: + return False + +def isInt(value): + try: + int(value) + return True + except ValueError: + return False + +def getValidationMap(properties): + """Builds a map for property validation. + Lowercases the keys of the property map, and checks value types. + """ + + new_properties = {} + for key in properties.keys(): + + value = properties[key] + if isInt(value): + value = int(value) + elif isFloat(value): + value = float(value) + + new_properties[key.lower()] = value + + return new_properties + def filterOmeroPropertyMap(property_map, filter_list): """Filters map before ingestion into omero server @@ -317,6 +350,9 @@ def process(transaction): # 5. Additional metadata is provided in an own metadata TSV file. # We extract the metadata from this file. properties = getPropertyMap(line, property_names) + + # 5.1 Validate metadata for image file + ImagingMetadataValidator.validateImagingProperties(getValidationMap(properties)) #one file can have many images, iterate over all img ids for img_id in omero_image_ids: @@ -343,4 +379,3 @@ def process(transaction): # 7. Last but not least we create the open science file format for images which is # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. -