diff --git a/.dependabot/config.yml b/.dependabot/config.yml deleted file mode 100644 index a0a1799d10..0000000000 --- a/.dependabot/config.yml +++ /dev/null @@ -1,10 +0,0 @@ -version: 2 -updates_configs: - - package_manager: "java:maven" - directory: "/" - update_schedule: "daily" - target-branch: development - default_labels: "dependabot" - commit-message: - prefix: "[DEPENDABOT]" - diff --git a/.github/workflows/java_checkstyle.yml b/.github/workflows/java_checkstyle.yml index a41426e130..67094d5ada 100644 --- a/.github/workflows/java_checkstyle.yml +++ b/.github/workflows/java_checkstyle.yml @@ -8,10 +8,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.11 + - name: Set up JDK 1.8 uses: actions/setup-java@v1 with: - java-version: 1.11 + java-version: 1.8 - name: Download Checkstyle run: wget https://github.com/checkstyle/checkstyle/releases/download/checkstyle-8.31/checkstyle-8.31-all.jar diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 452314f2b0..23fafb97b1 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -8,10 +8,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.11 + - name: Set up JDK 1.8 uses: actions/setup-java@v1 with: - java-version: 1.11 + java-version: 1.8 - name: Load local Maven repository cache uses: actions/cache@v2 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e8672e5f7d..531b9dfb67 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -88,7 +88,6 @@ This project adheres to `Semantic Versioning `_. **Deprecated** - 2.14.0 (2021-10-27) ------------------- diff --git a/README.md b/README.md index 2eba7ab305..d97d27b359 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,11 @@ Make sure, that you have defined the Github package Maven repository, in order f A Nanopore NGS measurement output is delivered to us as a nested folder structure, following this model: -![Nanopore Data Structure Model](./doc/figures/Nanopore_Data_Structure_Model.png) +![Nanopore Data Structure Model](./doc/figures/Nanopore_Data_Structure_Model.svg) + +A more recent model, which places two of the configuration files into a subfolder and adds the barcode alignment file, is also supported: + +![Nanopore Data Structure Model v2](./doc/figures/Nanopore_Data_Structure_Model_v2.svg) #### Nanopore usage example diff --git a/doc/figures/Nanopore_Data_Structure_Model.png b/doc/figures/Nanopore_Data_Structure_Model.png deleted file mode 100644 index 5e94d5f149..0000000000 Binary files a/doc/figures/Nanopore_Data_Structure_Model.png and /dev/null differ diff --git a/doc/figures/Nanopore_Data_Structure_Model.svg b/doc/figures/Nanopore_Data_Structure_Model.svg new file mode 100644 index 0000000000..d3ffbbd6fd --- /dev/null +++ b/doc/figures/Nanopore_Data_Structure_Model.svg @@ -0,0 +1,4 @@ + + + +
Root Folder
(OxfordNanoporeExperiment)
Root Folder...
Measurement Folder
(OxfordNanoporeMeasurement)
Measurement Folder...
1
1
1..n
1..n
FastQ Fail Folder
FastQ Fail Folder
FastQ Pass Folder
FastQ Pass Folder
Fast5 Pass Folder
Fast5 Pass Folder
Fast5 Fail Folder
Fast5 Fail Folder
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Sequencing Summary Log
Sequencing Summary Log
Duty Time Log
Duty Time Log
Final Summary Log
Final Summary Log
Throughput Log
Throughput Log
Report MD Log
Report MD Log
Report PDF Log
Report PDF Log
Drift Correction Log
Drift Correction Log
Mux Scan Data Log
Mux Scan Data Log
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
FastQ Folder
FastQ Folder
FastQ File
FastQ File
1
1
0..n
0..n
DataFile
DataFile
BarcodedFolder
BarcodedFolder
Extends
Extends
Extends
Extends
Data File
Data File
Extends
Extends
DataFolder
DataFolder
Extends
Extends
DataFolder
DataFolder
Extends
Extends
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
1
1
0..n
0..n
FastQ File
FastQ File
1
1
0..n
0..n
1
1
0..n
0..n
FastQ Folder
FastQ Folder
FastQ File
FastQ File
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
FastQ File
FastQ File
1
1
0..n
0..n
Fast5 Folder
Fast5 Folder
Fast5 File
Fast5 File
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
Fast5 File
Fast5 File
1
1
0..n
0..n
Fast5 Folder
Fast5 Folder
Fast5 File
Fast5 File
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
Fast5 File
Fast5 File
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
FastQ Folder
FastQ Folder
Fast5 Folder
Fast5 Folder
DataFolder
DataFolder
Text is not SVG - cannot display
\ No newline at end of file diff --git a/doc/figures/Nanopore_Data_Structure_Model_v2.svg b/doc/figures/Nanopore_Data_Structure_Model_v2.svg new file mode 100644 index 0000000000..330e266c8b --- /dev/null +++ b/doc/figures/Nanopore_Data_Structure_Model_v2.svg @@ -0,0 +1,4 @@ + + + +
Root Folder
(OxfordNanoporeExperiment)
Root Folder...
Measurement Folder
(OxfordNanoporeMeasurement)
Measurement Folder...
1
1
1..n
1..n
FastQ Fail Folder
FastQ Fail Folder
FastQ Pass Folder
FastQ Pass Folder
Fast5 Pass Folder
Fast5 Pass Folder
Fast5 Fail Folder
Fast5 Fail Folder
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Barcode Alignment Log
Barcode Alignment Log
Duty Time Log
Duty Time Log
Final Summary Log
Final Summary Log
Throughput Log
Throughput Log
Report MD Log
Report MD Log
Report PDF Log
Report PDF Log
Sequencing Summary Log
Sequencing Summary Log
Mux Scan Data Log
Mux Scan Data Log
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
FastQ Folder
FastQ Folder
FastQ File
FastQ File
1
1
0..n
0..n
DataFile
DataFile
BarcodedFolder
BarcodedFolder
Extends
Extends
Extends
Extends
Data File
Data File
Extends
Extends
DataFolder
DataFolder
Extends
Extends
DataFolder
DataFolder
Extends
Extends
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
1
1
0..n
0..n
FastQ File
FastQ File
1
1
0..n
0..n
1
1
0..n
0..n
FastQ Folder
FastQ Folder
FastQ File
FastQ File
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
FastQ File
FastQ File
1
1
0..n
0..n
Fast5 Folder
Fast5 Folder
Fast5 File
Fast5 File
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
Fast5 File
Fast5 File
1
1
0..n
0..n
Fast5 Folder
Fast5 Folder
Fast5 File
Fast5 File
Unclassified Folder
Unclassified Folder
1
1
0..n
0..n
Fast5 File
Fast5 File
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
1
1
0..n
0..n
FastQ Folder
FastQ Folder
Fast5 Folder
Fast5 Folder
DataFolder
DataFolder
Drift Correction Log
Drift Correction Log
Extends
Extends
1:1
1:1
1:1
1:1
Other Reports Subfolder
Other Reports Subfolder
Text is not SVG - cannot display
\ No newline at end of file diff --git a/pom.xml b/pom.xml index a6b3b75a06..15e0618835 100644 --- a/pom.xml +++ b/pom.xml @@ -81,20 +81,20 @@ org.codehaus.groovy groovy-bom - 2.5.14 + 3.0.9 pom import - - org.codehaus.groovy - groovy-all - 2.5.14 - pom - ${osgi.scope} - + + org.codehaus.groovy + groovy-all + 3.0.9 + pom + ${osgi.scope} + org.osgi osgi.core @@ -106,12 +106,12 @@ com.github.everit-org.json-schema org.everit.json.schema 1.12.2 - ${osgi.scope} + test life.qbic xml-manager-lib - 1.6.0 + 1.7.0 ${osgi.scope} @@ -144,7 +144,7 @@ org.spockframework spock-core - 2.0-groovy-2.5 + 2.0-groovy-3.0 test @@ -181,7 +181,7 @@ org.codehaus.gmavenplus gmavenplus-plugin - 1.12.1 + 1.13.1 @@ -217,12 +217,12 @@ org.apache.maven.plugins maven-site-plugin - 3.9.1 + 3.11.0 org.apache.maven.plugins maven-project-info-reports-plugin - 3.1.1 + 3.2.1 life.qbic @@ -269,7 +269,7 @@ biz.aQute.bnd bnd-maven-plugin - 5.1.2 + 6.1.0 diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy index 70a6b02c01..367fb89315 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy @@ -34,7 +34,8 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FQDN_FILES + ".ReportMdLog", FQDN_FILES + ".ReportPDFLog", FQDN_FILES + ".SequencingSummaryLog", - FQDN_FILES + ".ThroughputLog" + FQDN_FILES + ".ThroughputLog", + FQDN_FILES + ".BarcodeAlignmentLog" ] private final static Set nanoporeFolderTypes = [ @@ -45,7 +46,8 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FQDN_FOLDERS + ".FastQPassFolder", FQDN_FOLDERS + ".FastQFailFolder", FQDN_FOLDERS + ".UnclassifiedFast5Folder", - FQDN_FOLDERS + ".UnclassifiedFastQFolder" + FQDN_FOLDERS + ".UnclassifiedFastQFolder", + FQDN_FOLDERS + ".OtherReportsFolder" ] private OxfordNanoporeExperiment(String sampleId, List measurements) { diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy index 4825de3c2b..5d79cf489d 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy @@ -1,18 +1,14 @@ package life.qbic.datamodel.datasets +import groovy.json.JsonSlurper +import groovy.util.logging.Log4j2 import life.qbic.datamodel.datasets.datastructure.files.DataFile import life.qbic.datamodel.datasets.datastructure.folders.DataFolder import life.qbic.datamodel.datasets.datastructure.folders.nanopore.* -import org.everit.json.schema.ValidationException import java.util.regex.Matcher import java.util.regex.Pattern -import org.everit.json.schema.loader.SchemaLoader -import org.json.JSONObject -import org.json.JSONTokener -import groovy.util.logging.Log4j2 - /** * A dataset that represents a Oxford Nanopore Measurement. * @@ -21,23 +17,9 @@ import groovy.util.logging.Log4j2 @Log4j2 final class OxfordNanoporeMeasurement { - private static final String LIBRARY_PREP_KIT_SCHEMA = "SQK-.*(?=:)" - - private static final enum METADATA_FIELD { - ADAPTER, - ASIC_TEMP, - BASE_CALLER, - BASE_CALLER_VERSION, - DEVICE_TYPE, - FLOWCELL_ID, - FLOWCELL_POSITION, - FLOWCELL_TYPE, - LIBRARY_PREPARATION_KIT, - MACHINE_HOST, - START_DATE - } + private static final String LIBRARY_PREP_KIT_SCHEMA = "SQK-.*" - private final Map metadata + private final Metadata metadata private final Map folders @@ -50,12 +32,11 @@ final class OxfordNanoporeMeasurement { protected OxfordNanoporeMeasurement(String name, String path, List children, Map metadata) { this.logFilesCollection = new ArrayList<>() this.folders = new HashMap<>() - this.metadata = new HashMap() this.measurementFolder = MeasurementFolder.create(name, path, children) - validateMetaData(metadata) - readMetaData(metadata) + this.metadata = Metadata.from(metadata) + createContent() assessPooledStatus() assessState() @@ -69,16 +50,6 @@ final class OxfordNanoporeMeasurement { return new OxfordNanoporeMeasurement(name, path, children, metadata) } - private static void validateMetaData(Map metadata) throws IllegalArgumentException { - try { - MetaData.validateMetadata(metadata) - } catch (ValidationException e) { - // Aggregate the causing exceptions - def causes = e.getAllMessages().join("\n") - throw new IllegalArgumentException("The Nanopore metadata could not be collected.\nReason:\n$causes",) - } - } - private void assessPooledStatus() { this.pooledSamplesMeasurement = containsAtLeastOneBarcodedFolder(folders["fast5pass"]) // There can be still pooled samples in the failed folder, worst case is all @@ -97,33 +68,6 @@ final class OxfordNanoporeMeasurement { return false } - private void readMetaData(Map metadata) { - this.metadata[METADATA_FIELD.ADAPTER] = metadata["adapter"] - this.metadata[METADATA_FIELD.ASIC_TEMP] = metadata["asic_temp"] - this.metadata[METADATA_FIELD.BASE_CALLER] = metadata["base_caller"] - this.metadata[METADATA_FIELD.BASE_CALLER_VERSION] = metadata["base_caller_version"] - this.metadata[METADATA_FIELD.DEVICE_TYPE] = metadata["device_type"] - this.metadata[METADATA_FIELD.FLOWCELL_ID] = metadata["flow_cell_id"] - this.metadata[METADATA_FIELD.FLOWCELL_POSITION] = metadata["flow_cell_position"] - this.metadata[METADATA_FIELD.FLOWCELL_TYPE] = metadata["flow_cell_product_code"] - this.metadata[METADATA_FIELD.LIBRARY_PREPARATION_KIT] = extractLibraryKit(metadata["protocol"] ?: "") - this.metadata[METADATA_FIELD.MACHINE_HOST] = metadata["hostname"] - this.metadata[METADATA_FIELD.START_DATE] = metadata["started"] - } - - private static String extractLibraryKit(String text) { - Set result = [] - Pattern pattern = Pattern.compile(LIBRARY_PREP_KIT_SCHEMA, Pattern.CASE_INSENSITIVE) - Matcher m = pattern.matcher(text) - while (m.find()) { - result.add(m.group()) - } - if (result.isEmpty()) { - throw new MissingPropertyException("Could not find information about the library preparation kit.") - } - return result[0] - } - private void createContent() { measurementFolder.getChildren().each { element -> switch (element) { @@ -215,7 +159,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getAdapter() { - return metadata.get(METADATA_FIELD.ADAPTER) ?: "" + return metadata.getAdapter() } /** @@ -223,7 +167,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getAsicTemp() { - return metadata.get(METADATA_FIELD.ASIC_TEMP) + return metadata.getAsicTemp() } /** @@ -231,7 +175,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getDeviceType() { - return metadata.get(METADATA_FIELD.DEVICE_TYPE) + return metadata.getDeviceType() } /** @@ -239,7 +183,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getFlowcellId() { - return metadata.get(METADATA_FIELD.FLOWCELL_ID) + return metadata.getFlowcellId() } /** @@ -247,7 +191,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getFlowCellPosition() { - return metadata.get(METADATA_FIELD.FLOWCELL_POSITION) + return metadata.getFlowcellPosition() } /** @@ -255,7 +199,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getFlowCellType() { - return metadata.get(METADATA_FIELD.FLOWCELL_TYPE) + return metadata.getFlowcellType() } /** @@ -263,7 +207,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getBaseCaller() { - return metadata.get(METADATA_FIELD.BASE_CALLER) + return metadata.getBaseCaller() } /** @@ -271,7 +215,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getBaseCallerVersion() { - return metadata.get(METADATA_FIELD.BASE_CALLER_VERSION) + return metadata.getBaseCallerVersion() } /** @@ -279,7 +223,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getLibraryPreparationKit() { - return metadata.get(METADATA_FIELD.LIBRARY_PREPARATION_KIT) + metadata.getLibraryPreparationKit() } /** @@ -287,7 +231,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getMachineHost() { - return metadata.get(METADATA_FIELD.MACHINE_HOST) + metadata.getMachineHost() } /** @@ -295,7 +239,7 @@ final class OxfordNanoporeMeasurement { * @return */ String getStartDate() { - return metadata.get(METADATA_FIELD.START_DATE) + metadata.getStartDate() } private Map prepareUnclassifiedData() { @@ -369,24 +313,124 @@ final class OxfordNanoporeMeasurement { return this.measurementFolder.relativePath } - /* - Inner class that contains the logic for the metadata validation - */ - private static class MetaData { - - private static final SCHEMA = "/schemas/ont-metadata.schema.json" - - static validateMetadata(Map metaData) throws ValidationException { - // Load schema - final def metaDataJson = new JSONObject(metaData) - final def schemaStream = OxfordNanoporeMeasurement.getResourceAsStream(SCHEMA) - final def rawSchema = new JSONObject(new JSONTokener(schemaStream)) - final def jsonSchema = SchemaLoader.load(rawSchema) - // Validate against schema - jsonSchema.validate(metaDataJson) + private static class Metadata { + private static final Map SCHEMA = parseMetadataSchema() + private static final String LIBRARY_PREP_KIT_SCHEMA = "SQK-.*" + + private String adapter + private String asicTemp + private String deviceType + private String flowcellId + private String flowcellPosition + private String flowcellType + private String baseCaller + private String baseCallerVersion + private String libraryPreparationKit + private String machineHost + private String startDate + + private Metadata() {} + + /** + * Reads metadata information from the provided map given the map is valid according to the schema. + * + * @param metadataMap a map containing all required metadata + * @return a valid Metadata instance containing relevant information from the map + */ + static Metadata from(Map metadataMap) { + validateMetaDataMap(metadataMap) + Metadata metadata = new Metadata() + metadata.readMetaData(metadataMap) + return metadata } - } + private static Map parseMetadataSchema() { + URL schemaUrl = this.getClassLoader().getResource("schemas/ont-metadata.schema.json") + return new JsonSlurper().parse(schemaUrl) as Map + } + + private static void validateMetaDataMap(Map metadata) throws IllegalArgumentException { + def expectedKeys = SCHEMA.get("required") as List + + def missingKeys = expectedKeys.stream() + .filter({ !metadata.keySet().contains(it) }) + .collect() + if (!missingKeys.isEmpty()) { + throw new IllegalArgumentException('Required metadata properties missing: ' + missingKeys.join(", ")) + } + } + + private static String extractLibraryKit(String text) { + // cut off optional, unused suffix + text = text.replace(":True", "") + Set result = [] + Pattern pattern = Pattern.compile(LIBRARY_PREP_KIT_SCHEMA, Pattern.CASE_INSENSITIVE) + Matcher m = pattern.matcher(text) + while (m.find()) { + result.add(m.group()) + } + if (result.isEmpty()) { + throw new MissingPropertyException("Could not find information about the library preparation kit.") + } + return result[0] + } + + private void readMetaData(Map metadata) { + this.adapter = metadata["adapter"] ?: "" + this.asicTemp = metadata["asic_temp"] + this.baseCaller = metadata["base_caller"] + this.baseCallerVersion = metadata["base_caller_version"] + this.deviceType = metadata["device_type"] + this.flowcellId = metadata["flow_cell_id"] + this.flowcellPosition = metadata["flow_cell_position"] + this.flowcellType = metadata["flow_cell_product_code"] + this.libraryPreparationKit = extractLibraryKit(metadata["protocol"] ?: "") + this.machineHost = metadata["hostname"] + this.startDate = metadata["started"] + } + + String getAdapter() { + return adapter + } + + String getAsicTemp() { + return asicTemp + } + + String getDeviceType() { + return deviceType + } + + String getFlowcellId() { + return flowcellId + } + + String getFlowcellPosition() { + return flowcellPosition + } + + String getFlowcellType() { + return flowcellType + } + + String getBaseCaller() { + return baseCaller + } + + String getBaseCallerVersion() { + return baseCallerVersion + } + + String getLibraryPreparationKit() { + return libraryPreparationKit + } + String getMachineHost() { + return machineHost + } + String getStartDate() { + return startDate + } + } } diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BarcodeAlignmentLog.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BarcodeAlignmentLog.groovy new file mode 100644 index 0000000000..fa583c9ba9 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BarcodeAlignmentLog.groovy @@ -0,0 +1,32 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.DataFile + +/** + * A specialisation of a DataFile, represents an Oxford Nanopore barcode alignment log file + * + */ +class BarcodeAlignmentLog extends DataFile { + + final private static String FILE_TYPE = "tsv" + + final private static String NAME_SCHEMA = $/barcode_alignment_.*/$ + + protected BarcodeAlignmentLog() {} + + protected BarcodeAlignmentLog(String name, String relativePath) { + super(name, relativePath, FILE_TYPE) + validateName() + } + + static BarcodeAlignmentLog create(String name, String relativePath) { + return new BarcodeAlignmentLog(name, relativePath) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore barcode alignment log name schema!") + } + } + +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/OtherReportsFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/OtherReportsFolder.groovy new file mode 100644 index 0000000000..87753a56bd --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/OtherReportsFolder.groovy @@ -0,0 +1,40 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder. + * + * Holds information about a Oxford Nanopore NGS measurement. + * + */ +class OtherReportsFolder extends DataFolder { + + /** + * The name schema of a Oxford Nanopore "other reports" folder. + */ + final private static String NAME_SCHEMA = $/other_reports/$ + + protected OtherReportsFolder() {} + + protected OtherReportsFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a OtherReportsFolder object + * @param relativePath The relative path of the folder + * @param children A list with child elements of unknown type of the folder + * @return A new instance of a OtherReportsFolder object + */ + static OtherReportsFolder create(String name, String relativePath, List children) { + new OtherReportsFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore other_reports folder schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputV2.groovy b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputV2.groovy new file mode 100644 index 0000000000..f615bf84b2 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputV2.groovy @@ -0,0 +1,22 @@ +package life.qbic.datamodel.instruments + + +/** + * Represents the Nanopore instrument output data structure schema. + * + * The original schema is defined in as resource and is + * referenced here, wrapped in a Groovy class for reference + * in applications that want to validate the instrument + * output structure against the schema. + * + * @author Sven Fillinger + * @since 1.9.0 + */ +class OxfordNanoporeInstrumentOutputV2 { + + private static final String SCHEMA_PATH = "/schemas/nanopore-instrument-output_v2.schema.json" + + static InputStream getSchemaAsStream() { + return OxfordNanoporeInstrumentOutputV2.getResourceAsStream(SCHEMA_PATH) + } +} diff --git a/src/main/resources/schemas/nanopore-instrument-output_v2.schema.json b/src/main/resources/schemas/nanopore-instrument-output_v2.schema.json new file mode 100644 index 0000000000..452feeefea --- /dev/null +++ b/src/main/resources/schemas/nanopore-instrument-output_v2.schema.json @@ -0,0 +1,605 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "http://qbic.life/nanopore-instrument-output_v2.schema.json", + "title": "Nanopore Instrument Output V2", + "description": "Describes in which form Nanopore data is received from the lab.", + "definitions": { + "folder": { + "description": "Describes a folder", + "type": "object", + "required": [ + "name", + "path", + "children" + ], + "properties": { + "name": { + "description": "Folder name", + "type": "string", + "minLength": 1 + }, + "path": { + "description": "relative folderpath", + "type": "string", + "minLength": 1 + }, + "children": { + "description": "Describes files and/or sub-folders if existent", + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "$ref": "#/definitions/file" + } + ] + } + } + } + }, + "file": { + "description": "Describes a file", + "type": "object", + "required": [ + "name", + "path", + "file_type" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "path": { + "type": "string", + "minLength": 1 + }, + "file_type": { + "type": "string", + "minLength": 1 + } + } + }, + "qbic_code": { + "description": "Describes a QBiC code used as a prefix", + "type": "string", + "pattern": "Q\\w{4}\\d{3}[A-X][A-X0-9].*" + }, + "barcoded_folder": { + "description": "folder starting with qbic barcode prefix", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "$ref": "#/definitions/qbic_code" + } + } + } + ] + }, + "other_reports_folder": { + "description": "subfolder containing some of the report files", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "other_reports" + }, + "children": { + "items": { + "oneOf": [ + { + "$ref": "#/definitions/drift_correction_log" + }, + { + "$ref": "#/definitions/mux_scan_data_log" + } + ] + }, + "minItems": 2 + } + } + } + ] + }, + "fast5_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fast5" + } + } + } + ] + }, + "fastqgz_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fastq.gz" + } + } + } + ] + }, + "fastq_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fastq" + } + } + } + ] + }, + "unclassified_folder": { + "description": "folder containing unassigned read file(s)", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "unclassified" + } + } + } + ] + }, + "fast5_unclassified_folder": { + "description": "folder containing fast5 data from a pooling experiment, that could not be assigned to one of the known samples", + "allOf": [ + { + "$ref": "#/definitions/unclassified_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fast5_file" + }, + "minItems": 0 + } + } + } + ] + }, + "fastq_unclassified_folder": { + "description": "folder containing fastq and/or fastq.gz data from a pooling experiment, that could not be assigned to one of the known samples", + "allOf": [ + { + "$ref": "#/definitions/unclassified_folder" + }, + { + "properties": { + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastqgz_file" + }, + { + "$ref": "#/definitions/fastq_file" + } + ] + }, + "minItems": 0 + } + } + } + ] + }, + "fast5_subfolder": { + "description": "folder containing fast5 data from a single sample (only when pooling is used)", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fast5_file" + }, + "minItems": 1 + } + } + } + ] + }, + "fast5_fail": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fast5_fail" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fast5_subfolder" + }, + { + "$ref": "#/definitions/fast5_unclassified_folder" + }, + { + "$ref": "#/definitions/fast5_file" + } + ] + } + } + } + } + ] + }, + "fast5_pass": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fast5_pass" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fast5_subfolder" + }, + { + "$ref": "#/definitions/fast5_unclassified_folder" + }, + { + "$ref": "#/definitions/fast5_file" + } + ] + } + } + } + } + ] + }, + "fastq_fail": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fastq_fail" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastq_subfolder" + }, + { + "$ref": "#/definitions/fastq_unclassified_folder" + }, + { + "$ref": "#/definitions/fastqgz_file" + } + ] + } + } + } + } + ] + }, + "fastq_pass": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fastq_pass" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastq_subfolder" + }, + { + "$ref": "#/definitions/fastq_unclassified_folder" + }, + { + "$ref": "#/definitions/fastqgz_file" + } + ] + } + } + } + } + ] + }, + "fastq_subfolder": { + "description": "folder containing gzipped fastq data from a single sample (only when pooling is used)", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fastqgz_file" + }, + "minItems": 1 + } + } + } + ] + }, + "measurements": { + "description": "Top folder generated by the facility, containing one or more timestamped measurements", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "allOf": [ + { + "$ref": "#/definitions/measurement" + } + ] + }, + "minItems": 1 + } + } + } + ] + }, + "measurement": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "\\d{4}(0?[1-9]|1[012])(0?[1-9]|[12][0-9]|3[01])_([01][0-9]|2[0-3])([0-5][0-9]).*", + "description": "Name of measurement subfolder. Starts with date and time of measurement." + }, + "children": { + "uniqueItems": true, + "minItems": 12, + "items": { + "oneOf": [ + { + "$ref": "#/definitions/fastq_fail" + }, + { + "$ref": "#/definitions/fastq_pass" + }, + { + "$ref": "#/definitions/fast5_pass" + }, + { + "$ref": "#/definitions/fast5_fail" + }, + { + "$ref": "#/definitions/duty_time_log" + }, + { + "$ref": "#/definitions/barcode_alignment_log" + }, + { + "$ref": "#/definitions/final_summary_log" + }, + { + "$ref": "#/definitions/report_md_log" + }, + { + "$ref": "#/definitions/report_pdf_log" + }, + { + "$ref": "#/definitions/sequencing_summary_log" + }, + { + "$ref": "#/definitions/throughput_log" + }, + { + "$ref": "#/definitions/other_reports_folder" + } + ] + } + } + } + } + ] + }, + "drift_correction_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "drift_correction_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + }, + "duty_time_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "duty_time_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + }, + "barcode_alignment_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "barcode_alignment_.*" + }, + "file_type": { + "pattern": "tsv" + } + } + } + ] + }, + "final_summary_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "final_summary_.*" + }, + "file_type": { + "pattern": "txt" + } + } + } + ] + }, + "mux_scan_data_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "mux_scan_data_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + }, + "report_md_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "report_.*" + }, + "file_type": { + "pattern": "md" + } + } + } + ] + }, + "report_pdf_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "report_.*" + }, + "file_type": { + "pattern": "pdf" + } + } + } + ] + }, + "sequencing_summary_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "sequencing_summary_.*" + }, + "file_type": { + "pattern": "txt" + } + } + } + ] + }, + "throughput_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "throughput_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + } + }, + "allOf": [ + { + "$ref": "#/definitions/measurements" + } + ] +} diff --git a/src/main/resources/schemas/ont-metadata.schema.json b/src/main/resources/schemas/ont-metadata.schema.json index 98101d2149..6294d002e9 100644 --- a/src/main/resources/schemas/ont-metadata.schema.json +++ b/src/main/resources/schemas/ont-metadata.schema.json @@ -43,7 +43,7 @@ }, "protocol": { "type": "string", - "pattern": "SQK-.*(?=:)" + "pattern": "SQK-.*" }, "started": { "type": "string", diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy index 3be25dd3df..7a404849b7 100644 --- a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy @@ -17,6 +17,12 @@ class OxfordNanoporeExperimentSpec extends Specification { */ @Shared Map minimalWorkingSimpleDataStructure + /** + * Newer map that stores the Oxford Nanopore folder structure + * according to the schema that puts some reports in its own folder and adds a new report + */ + @Shared + Map minimalWorkingSimpleDataStructureWithReportsFolder /** * Map that that stores the Oxford Nanopore folder structure * according to the schema containing unclassified read information @@ -33,6 +39,9 @@ class OxfordNanoporeExperimentSpec extends Specification { def setupSpec() { InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("valid-example.json") minimalWorkingSimpleDataStructure = (Map) new JsonSlurper().parse(stream) + // new example with slightly different structure + stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("valid-example-newer.json") + minimalWorkingSimpleDataStructureWithReportsFolder = (Map) new JsonSlurper().parse(stream) // read in unclassified example stream = Thread.currentThread().getContextClassLoader().getResourceAsStream("valid-example-unclassified.json") unclassifiedWorkingDataStructure = (Map) new JsonSlurper().parse(stream) @@ -53,6 +62,21 @@ class OxfordNanoporeExperimentSpec extends Specification { then: assert experiment.sampleCode == "QABCD001AB" assert measurements.size() == 1 + assert measurements[0].libraryPreparationKit == "SQK-LSK109" + } + + def "Create simple sample Oxford Nanopore experiment successfully for newer structure"() { + given: + final def example = minimalWorkingSimpleDataStructureWithReportsFolder + + when: + final def experiment = OxfordNanoporeExperiment.create(example) + final def measurements = experiment.getMeasurements() + + then: + assert experiment.sampleCode == "QABCD001AB" + assert measurements.size() == 1 + assert measurements[0].libraryPreparationKit == "SQK-LSK109-XL" } def "Create a simple pooled Oxford Nanopore experiment successfully"() { diff --git a/src/test/resources/valid-example-newer.json b/src/test/resources/valid-example-newer.json new file mode 100644 index 0000000000..346a0ef617 --- /dev/null +++ b/src/test/resources/valid-example-newer.json @@ -0,0 +1,200 @@ +{ + "name": "QABCD001AB_E12A345a01_PAE12345", + "path": "./", + "children": [ + { + "name": "20200122_1217_1-A1-B1-PAE12345_1234567a", + "metadata": { + "adapter": "flongle", + "asic_temp": "32.631687", + "base_caller": "Guppy", + "base_caller_version": "3.2.8+bd67289", + "device_type" : "promethion", + "flow_cell_id": "PAE26306", + "flow_cell_product_code": "FLO-PRO002", + "flow_cell_position": "2-A3-D3", + "hostname": "PCT0094", + "protocol": "protocol=sequencing/sequencing_PRO002_DNA:FLO-PRO002:SQK-LSK109-XL", + "started": "2020-02-11T15:52:10.465982+01:00" + }, + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a", + "children": [ + { + "name": "throughput_.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/throughput_.csv", + "file_type": "csv" + }, + { + "name": "report_.md", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/report_.md", + "file_type": "md" + }, + { + "name": "final_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/final_summary_.txt", + "file_type": "txt" + }, + { + "name": "barcode_alignment_.tsv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/barcode_alignment_.tsv", + "file_type": "tsv" + }, + { + "name": "other_reports", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/other_reports", + "children": [ + { + "name": "mux_scan_data.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/other_reports/mux_scan_data.csv", + "file_type": "csv" + }, + { + "name": "drift_correction_.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/other_reports/drift_correction_.csv", + "file_type": "csv" + } + ] + }, + { + "name": "fastq_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile1.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile1.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "fastq_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "duty_time_.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/duty_time_.csv", + "file_type": "csv" + }, + { + "name": "sequencing_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/sequencing_summary_.txt", + "file_type": "txt" + }, + { + "name": "report_test.pdf", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/report_test.pdf", + "file_type": "pdf" + }, + { + "name": "fast5_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/", + "children": [ + { + "name": "myfile2.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile2.fast5", + "file_type": "fast5" + }, + { + "name": "myfile4.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile4.fast5", + "file_type": "fast5" + }, + { + "name": "myfile3.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile3.fast5", + "file_type": "fast5" + }, + { + "name": "myfile5.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile5.fast5", + "file_type": "fast5" + }, + { + "name": "myfile.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile.fast5", + "file_type": "fast5" + } + ] + }, + { + "name": "fast5_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/", + "children": [ + { + "name": "myfile2.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile2.fast5", + "file_type": "fast5" + }, + { + "name": "myfile4.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile4.fast5", + "file_type": "fast5" + }, + { + "name": "myfile3.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile3.fast5", + "file_type": "fast5" + }, + { + "name": "myfile5.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile5.fast5", + "file_type": "fast5" + }, + { + "name": "myfile.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile.fast5", + "file_type": "fast5" + } + ] + } + ] + } + ] +}