Skip to content

Commit

Permalink
Release 1.9.0
Browse files Browse the repository at this point in the history
Release 1.9.0
  • Loading branch information
sven1103 authored Jun 28, 2021
2 parents b86b358 + 1f6b8b3 commit 2e27646
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

# 1.9.0 2021-06-28

* Provides new ETL routine written in Java, that will replace all Jython scripts at some point [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/85)
* Support for nf-core pipeline result registration [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/85)
* Provides metadata validation for imaging data (OMERO etl). [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/83)

## 1.8.0 2021-05-11

* Add example Java dropbox
Expand Down
16 changes: 16 additions & 0 deletions drop-boxes/register-all-dropbox/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# New ETL logic written in Java

Please find the source code of the ETL routine that this article is referring to in the
[Java openBIS dropboxes](https://github.com/qbicsoftware/java-openbis-dropboxes) Github repository.

## Installation

Please provide the Java binaries as JAR from the [Java openBIS dropbox](https://github.com/qbicsoftware/java-openbis-dropboxes) in this repositories
folder `./lib`.

The DSS needs to be restarted in order to activate this dropbox.

## ETL routine

This dropbox expects a folder containing data and creates new openBIS dataset from it. For more information
please visit [Java openBIS dropbox](https://github.com/qbicsoftware/java-openbis-dropboxes).
2 changes: 2 additions & 0 deletions drop-boxes/register-all-dropbox/lib/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Put the compiled Java binaries as JARs in this directory in order
to be loaded by the openBIS DSS class loader on DSS startup.
12 changes: 12 additions & 0 deletions drop-boxes/register-all-dropbox/plugin.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# Drop box for registering a fastq file as a data set
#

incoming-data-completeness-condition = marker-file
top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JavaTopLevelDataSetHandlerV2
program-class = life.qbic.registration.MainETL
storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor
# Variables:
# incoming-root-dir
# Path to the directory which contains incoming directories for drop boxes.
incoming-dir = ${incoming-root-dir}/QBiC-register-all-data
43 changes: 39 additions & 4 deletions drop-boxes/register-omero-metadata/register-omero.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria

from life.qbic.utils import ImagingMetadataValidator

#class OmeroError(Error):

Expand Down Expand Up @@ -58,7 +59,7 @@
INCOMING_DATE_FORMAT = '%d.%m.%Y'
OPENBIS_DATE_FORMAT = '%Y-%m-%d'

PROPPERTY_FILTER_LIST = ["IMAGE_FILE_NAME", "INSTRUMENT_USER", "IMAGING_DATE"]
PROPPERTY_FILTER_LIST = ["IMAGE_FILENAME", "INSTRUMENT_USER", "IMAGING_DATE"]

def mapDateString(date_string):
return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT)
Expand Down Expand Up @@ -177,7 +178,7 @@ def validatePropertyNames(property_names):
"""

# fast validation without parser object.
required_names = ["IMAGE_FILE_NAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"]
required_names = ["IMAGE_FILENAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"]

for name in required_names:
if not name in property_names:
Expand All @@ -192,7 +193,7 @@ def getPropertyMap(line, property_names):
properties = {}
property_values = line.split("\t")

for i in range(1, len(property_names)): #exclude first col (filename)
for i in range(0, len(property_names)): #do not exclude first col (filename), the schema checks for it
##remove trailing newline, and replace space with underscore
name = property_names[i].rstrip('\n').replace(" ", "_")
value = property_values[i].rstrip('\n').replace(" ", "_")
Expand All @@ -201,6 +202,38 @@ def getPropertyMap(line, property_names):

return properties

def isFloat(value):
try:
float(value)
return True
except ValueError:
return False

def isInt(value):
try:
int(value)
return True
except ValueError:
return False

def getValidationMap(properties):
"""Builds a map for property validation.
Lowercases the keys of the property map, and checks value types.
"""

new_properties = {}
for key in properties.keys():

value = properties[key]
if isInt(value):
value = int(value)
elif isFloat(value):
value = float(value)

new_properties[key.lower()] = value

return new_properties

def filterOmeroPropertyMap(property_map, filter_list):
"""Filters map before ingestion into omero server
Expand Down Expand Up @@ -317,6 +350,9 @@ def process(transaction):
# 5. Additional metadata is provided in an own metadata TSV file.
# We extract the metadata from this file.
properties = getPropertyMap(line, property_names)

# 5.1 Validate metadata for image file
ImagingMetadataValidator.validateImagingProperties(getValidationMap(properties))

#one file can have many images, iterate over all img ids
for img_id in omero_image_ids:
Expand All @@ -343,4 +379,3 @@ def process(transaction):

# 7. Last but not least we create the open science file format for images which is
# OMERO-Tiff and store it in OMERO next to the proprierary vendor format.

0 comments on commit 2e27646

Please sign in to comment.