diff --git a/README.md b/README.md index d97d27b359..752a60c58c 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,10 @@ A more recent model, which places two of the configuration files into a subfolde ![Nanopore Data Structure Model v2](./doc/figures/Nanopore_Data_Structure_Model_v2.svg) +V4 outlines a model in which a second higher-accuracy basecalling was performed after the initial basecalling + +![Nanopore Data Structure Model v4](./doc/figures/Nanopore_Data_Structure_Model_v4.svg) + #### Nanopore usage example For usage examples, see the [usage documentation](./doc/examples.md). diff --git a/doc/figures/Nanopore_Data_Structure_Model_v4.svg b/doc/figures/Nanopore_Data_Structure_Model_v4.svg new file mode 100644 index 0000000000..4c1602816e --- /dev/null +++ b/doc/figures/Nanopore_Data_Structure_Model_v4.svg @@ -0,0 +1,4 @@ + + + +Root Folder(OxfordNanoporeExperiment)Root Folder...Measurement Folder(OxfordNanoporeMeasurement)Measurement Folder...111..n1..nFastQ Fail FolderFastQ Fail FolderFastQ Pass FolderFastQ Pass FolderFast5 Pass FolderFast5 Pass FolderFast5 Fail FolderFast5 Fail Folder1111111111111111Sequencing Summary LogSequencing Summary LogDuty Time LogDuty Time LogFinal Summary LogFinal Summary Log Throughput LogThroughput LogReport MD LogReport MD LogReport PDF LogReport PDF LogDrift Correction LogDrift Correction LogMux Scan Data LogMux Scan Data Log11111111111111111111111111111111FastQ FolderFastQ FolderFastQ FileFastQ File0..n0..nDataFileDataFileBarcodedFolderBarcodedFolderExtendsExtendsExtendsExtendsData FileData FileExtendsExtendsDataFolderDataFolderExtendsExtendsDataFolderDataFolderExtendsExtendsUnclassified FolderUnclassified Folder110..n0..n0..n0..nFastQ FileFastQ File110..n0..n110..n0..nFastQ FolderFastQ FolderFastQ FileFastQ FileUnclassified FolderUnclassified Folder110..n0..nFastQ FileFastQ File110..n0..nFast5 FolderFast5 FolderFast5 FileFast5 FileUnclassified FolderUnclassified Folder110..n0..nFast5 FileFast5 File110..n0..nFast5 FolderFast5 FolderFast5 FileFast5 FileUnclassified FolderUnclassified Folder110..n0..nFast5 FileFast5 File110..n0..n0..n0..n0..n0..n110..n0..n110..n0..n0..n0..n110..n0..n0..n0..n0..n0..nFastQ FolderFastQ FolderFast5 FolderFast5 FolderDataFolderDataFolderSequencing Summary LogSequencing Summary LogSequencing Telemetry LogSequencing Telemetry LogGuppy Basecalling Client LogGuppy Basecalling...Fastq Fail FolderFastq Fail Folder1111111111Data FileData FileExtendsExtendsFastQ FolderFastQ FolderUnclassified FolderUnclassified FolderFastQ FileFastQ FileFastQ FileFastQ File110..n0..n110..n0..n0..n0..n0..n0..n110..n0..nFastq Pass FolderFastq Pass FolderFastQ FolderFastQ FolderUnclassified FolderUnclassified FolderFastQ FileFastQ FileFastQ FileFastQ File110..n0..n110..n0..n110..n0..n0..n0..n110..n0..nBasecallingBasecallingDataFolderDataFolderExtendsExtends11Text is not SVG - cannot display \ No newline at end of file diff --git a/pom.xml b/pom.xml index 54d10a8cee..9996e7e362 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ life.qbic data-model-lib - 2.23.0-SNAPSHOT + 2.24.0-SNAPSHOT data-model-lib http://github.com/qbicsoftware/data-model-lib Data models. A collection of QBiC's central data models and DTOs. diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy index e4f0146f93..057834dc29 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy @@ -40,7 +40,9 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FQDN_FILES + ".BarcodeAlignmentLog", FQDN_FILES + ".PoreActivityLog", FQDN_FILES + ".SampleSheetLog", - FQDN_FILES + ".PoreScanDataLog" + FQDN_FILES + ".PoreScanDataLog", + FQDN_FILES + ".SequencingTelemetryLog", + FQDN_FILES + ".GuppyBasecallLog" ] private final static Set nanoporeFolderTypes = [ @@ -52,7 +54,8 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FQDN_FOLDERS + ".FastQFailFolder", FQDN_FOLDERS + ".UnclassifiedFast5Folder", FQDN_FOLDERS + ".UnclassifiedFastQFolder", - FQDN_FOLDERS + ".OtherReportsFolder" + FQDN_FOLDERS + ".OtherReportsFolder", + FQDN_FOLDERS + ".BasecallingFolder" ] private OxfordNanoporeExperiment(String sampleId, List measurements) { diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/GuppyBasecallLog.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/GuppyBasecallLog.groovy new file mode 100644 index 0000000000..c749b0564c --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/GuppyBasecallLog.groovy @@ -0,0 +1,30 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.DataFile + +/** + * A specialisation of a DataFile, represents an Oxford Nanopore guppy basecalling client log file + */ +class GuppyBasecallLog extends DataFile { + + final private static String FILE_TYPE = "log" + + final private static String NAME_SCHEMA = $/guppy_basecall_client_log-.*/$ + + protected GuppyBasecallLog() {} + + protected GuppyBasecallLog(String name, String relativePath) { + super(name, relativePath, FILE_TYPE) + validateName() + } + + static GuppyBasecallLog create(String name, String relativePath) { + return new GuppyBasecallLog(name, relativePath) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore guppy basecall client log schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/SequencingTelemetryLog.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/SequencingTelemetryLog.groovy new file mode 100644 index 0000000000..2ae6d07cf5 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/SequencingTelemetryLog.groovy @@ -0,0 +1,32 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.DataFile + +/** + * A specialisation of a DataFile, represents an Oxford Nanopore sequencing telemetry log file + * + */ +class SequencingTelemetryLog extends DataFile { + + final private static String FILE_TYPE = "js" + + final private static String NAME_SCHEMA = $/sequencing_telemetry_.*/$ + + protected SequencingTelemetryLog() {} + + protected SequencingTelemetryLog(String name, String relativePath) { + super(name, relativePath, FILE_TYPE) + validateName() + } + + static SequencingTelemetryLog create(String name, String relativePath) { + return new SequencingTelemetryLog(name, relativePath) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore sequencing telemetry log name schema!") + } + } + +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BasecallingFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BasecallingFolder.groovy new file mode 100644 index 0000000000..eba6e31afa --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BasecallingFolder.groovy @@ -0,0 +1,42 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * + * + * + * + * @since + * + */ +class BasecallingFolder extends DataFolder { + /** + * The name schema of a basecalling folder contained within the nanopore dataset. + * + */ + final private static String NAME_SCHEMA = /basecalling/ + + protected BasecallingFolder() {} + + protected BasecallingFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a BasecallingFolder object + * @param relativePath The relative path of the folder + * @param children A list with child elements of unknown type of the folder + * @return A new instance of a BasecallingFolder object + */ + static BasecallingFolder create(String name, String relativePath, List> children) { + new BasecallingFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore Basecalling schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputV4.groovy b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputV4.groovy new file mode 100644 index 0000000000..14f4d5050e --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputV4.groovy @@ -0,0 +1,22 @@ +package life.qbic.datamodel.instruments + + +/** + * Represents the Nanopore instrument output data structure schema. + * + * The original schema is defined in as resource and is + * referenced here, wrapped in a Groovy class for reference + * in applications that want to validate the instrument + * output structure against the schema. + * + * @author Steffen Greiner + * @since 1.9.0 + */ +class OxfordNanoporeInstrumentOutputV4 { + + private static final String SCHEMA_PATH = "/schemas/nanopore-instrument-output_v4.schema.json" + + static InputStream getSchemaAsStream() { + return OxfordNanoporeInstrumentOutputV4.getResourceAsStream(SCHEMA_PATH) + } +} diff --git a/src/main/resources/schemas/nanopore-instrument-output.schema.json b/src/main/resources/schemas/nanopore-instrument-output.schema.json index 47a6be3be5..3071d093a0 100644 --- a/src/main/resources/schemas/nanopore-instrument-output.schema.json +++ b/src/main/resources/schemas/nanopore-instrument-output.schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "http://qbic.life/nanopore-instrument-output.schema.json", "title": "Nanopore Instrument Output", - "description": "Describes in which form Nanopore data is received from the lab.", + "description": "Describes in which form PromethION/MinION sequenced sequenced Nanopore is received from the Microbiology lab.", "definitions": { "folder": { "description": "Describes a folder", diff --git a/src/main/resources/schemas/nanopore-instrument-output_v2.schema.json b/src/main/resources/schemas/nanopore-instrument-output_v2.schema.json index 452feeefea..39256876c7 100644 --- a/src/main/resources/schemas/nanopore-instrument-output_v2.schema.json +++ b/src/main/resources/schemas/nanopore-instrument-output_v2.schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "http://qbic.life/nanopore-instrument-output_v2.schema.json", "title": "Nanopore Instrument Output V2", - "description": "Describes in which form Nanopore data is received from the lab.", + "description": "Describes in which form PromethION/MinION sequenced sequenced Nanopore data is received from the medical genetics lab. Accounts for 'other reports' folder created by the lab", "definitions": { "folder": { "description": "Describes a folder", diff --git a/src/main/resources/schemas/nanopore-instrument-output_v3.schema.json b/src/main/resources/schemas/nanopore-instrument-output_v3.schema.json index 456bb037dd..3c656cc524 100644 --- a/src/main/resources/schemas/nanopore-instrument-output_v3.schema.json +++ b/src/main/resources/schemas/nanopore-instrument-output_v3.schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "http://qbic.life/nanopore-instrument-output_v3.schema.json", "title": "Nanopore Instrument Output V3", - "description": "Describes in which form Nanopore data is received from the lab.", + "description": "Describes in which form PromethION/MinION sequenced sequenced Nanopore data is received from the medical genetics lab. Accounts for the adapted 'other_reports' folder structure provided by the lab", "definitions": { "folder": { "description": "Describes a folder", diff --git a/src/main/resources/schemas/nanopore-instrument-output_v4.schema.json b/src/main/resources/schemas/nanopore-instrument-output_v4.schema.json new file mode 100644 index 0000000000..0c22e107a9 --- /dev/null +++ b/src/main/resources/schemas/nanopore-instrument-output_v4.schema.json @@ -0,0 +1,635 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "http://qbic.life/nanopore-instrument-output_v4.schema.json", + "title": "Nanopore Instrument Output V4", + "description": "Describes in which form PromethION/MinION sequenced Nanopore data is received from the microbiology lab. For this dataset a second basecalling with higher accuracy was performed after the an initial fast basecalling during sequencing", + "definitions": { + "folder": { + "description": "Describes a folder", + "type": "object", + "required": [ + "name", + "path", + "children" + ], + "properties": { + "name": { + "description": "Folder name", + "type": "string", + "minLength": 1 + }, + "path": { + "description": "relative folderpath", + "type": "string", + "minLength": 1 + }, + "children": { + "description": "Describes files and/or sub-folders if existent", + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "$ref": "#/definitions/file" + } + ] + } + } + } + }, + "file": { + "description": "Describes a file", + "type": "object", + "required": [ + "name", + "path", + "file_type" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "path": { + "type": "string", + "minLength": 1 + }, + "file_type": { + "type": "string", + "minLength": 1 + } + } + }, + "qbic_code": { + "description": "Describes a QBiC code used as a prefix", + "type": "string", + "pattern": "Q\\w{4}\\d{3}[A-X][A-X0-9].*" + }, + "barcoded_folder": { + "description": "folder starting with qbic barcode prefix", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "$ref": "#/definitions/qbic_code" + } + } + } + ] + }, + "fast5_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fast5" + } + } + } + ] + }, + "fastqgz_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fastq.gz" + } + } + } + ] + }, + "fastq_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fastq" + } + } + } + ] + }, + "unclassified_folder": { + "description": "folder containing unassigned read file(s)", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "unclassified" + } + } + } + ] + }, + "fast5_unclassified_folder": { + "description": "folder containing fast5 data from a pooling experiment, that could not be assigned to one of the known samples", + "allOf": [ + { + "$ref": "#/definitions/unclassified_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fast5_file" + }, + "minItems": 0 + } + } + } + ] + }, + "fastq_unclassified_folder": { + "description": "folder containing fastq and/or fastq.gz data from a pooling experiment, that could not be assigned to one of the known samples", + "allOf": [ + { + "$ref": "#/definitions/unclassified_folder" + }, + { + "properties": { + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastqgz_file" + }, + { + "$ref": "#/definitions/fastq_file" + } + ] + }, + "minItems": 0 + } + } + } + ] + }, + "fast5_subfolder": { + "description": "folder containing fast5 data from a single sample (only when pooling is used)", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fast5_file" + }, + "minItems": 1 + } + } + } + ] + }, + "fast5_fail": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fast5_fail" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fast5_subfolder" + }, + { + "$ref": "#/definitions/fast5_unclassified_folder" + }, + { + "$ref": "#/definitions/fast5_file" + } + ] + } + } + } + } + ] + }, + "fast5_pass": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fast5_pass" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fast5_subfolder" + }, + { + "$ref": "#/definitions/fast5_unclassified_folder" + }, + { + "$ref": "#/definitions/fast5_file" + } + ] + } + } + } + } + ] + }, + "fastq_fail": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fastq_fail" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastq_subfolder" + }, + { + "$ref": "#/definitions/fastq_unclassified_folder" + }, + { + "$ref": "#/definitions/fastqgz_file" + } + ] + } + } + } + } + ] + }, + "fastq_pass": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fastq_pass" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastq_subfolder" + }, + { + "$ref": "#/definitions/fastq_unclassified_folder" + }, + { + "$ref": "#/definitions/fastqgz_file" + } + ] + } + } + } + } + ] + }, + "fastq_subfolder": { + "description": "folder containing gzipped fastq data from a single sample (only when pooling is used)", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fastqgz_file" + }, + "minItems": 1 + } + } + } + ] + }, + "basecalling_folder": { + "description": "folder containing the files resulting from a second high accuracy basecalling performed after the initial sequencing", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "basecalling" + }, + "children": { + "items": { + "uniqueItems": true, + "minItems": 5, + "anyOf": [ + { + "$ref": "#/definitions/fastq_pass" + }, + { + "$ref": "#/definitions/fastq_fail" + }, + { + "$ref": "#/definitions/sequencing_summary_log" + }, + { + "$ref": "#/definitions/sequencing_telemetry" + }, + { + "$ref": "#/definitions/guppy_basecall_client_log" + } + ] + } + } + } + } + ] + }, + "measurements": { + "description": "Top folder generated by the facility, containing one or more timestamped measurements", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "allOf": [ + { + "$ref": "#/definitions/measurement" + } + ] + }, + "minItems": 1 + } + } + } + ] + }, + "measurement": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "\\d{4}(0?[1-9]|1[012])(0?[1-9]|[12][0-9]|3[01])_([01][0-9]|2[0-3])([0-5][0-9]).*", + "description": "Name of measurement subfolder. Starts with date and time of measurement." + }, + "children": { + "uniqueItems": true, + "minItems": 11, + "items": { + "oneOf": [ + { + "$ref": "#/definitions/fastq_fail" + }, + { + "$ref": "#/definitions/fastq_pass" + }, + { + "$ref": "#/definitions/fast5_pass" + }, + { + "$ref": "#/definitions/fast5_fail" + }, + { + "$ref": "#/definitions/drift_correction_log" + }, + { + "$ref": "#/definitions/duty_time_log" + }, + { + "$ref": "#/definitions/final_summary_log" + }, + { + "$ref": "#/definitions/mux_scan_data_log" + }, + { + "$ref": "#/definitions/report_md_log" + }, + { + "$ref": "#/definitions/report_html_log" + }, + { + "$ref": "#/definitions/sequencing_summary_log" + }, + { + "$ref": "#/definitions/throughput_log" + }, + { + "$ref": "#/definitions/basecalling_folder" + } + ] + } + } + } + } + ] + }, + "drift_correction_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "drift_correction_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + }, + "duty_time_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "duty_time_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + }, + "final_summary_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "final_summary_.*" + }, + "file_type": { + "pattern": "txt" + } + } + } + ] + }, + "mux_scan_data_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "mux_scan_data_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + }, + "report_md_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "report_.*" + }, + "file_type": { + "pattern": "md" + } + } + } + ] + }, + "report_html_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "report_.*" + }, + "file_type": { + "pattern": "html" + } + } + } + ] + }, + "sequencing_summary_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "sequencing_summary_.*" + }, + "file_type": { + "pattern": "txt" + } + } + } + ] + }, + "throughput_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "throughput_.*" + }, + "file_type": { + "pattern": "csv" + } + } + } + ] + }, + "sequencing_telemetry": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "sequencing_telemetry_.*" + }, + "file_type": { + "pattern": "js" + } + } + } + ] + }, + "guppy_basecall_client_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "guppy_basecall_client_log-.*" + }, + "file_type": { + "pattern": "log" + } + } + } + ] + } + }, + "allOf": [ + { + "$ref": "#/definitions/measurements" + } + ] +} diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy index ebb263fc11..391aa0b0c2 100644 --- a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy @@ -35,6 +35,12 @@ class OxfordNanoporeExperimentSpec extends Specification { */ @Shared Map extendedDataStructureWithReportsFolderV3 + + /** + * Addition to the newest structure, containing a second basecalling run + */ + @Shared + Map extendedDataStructureWithReportsFolderV4 /** * Map that that stores the Oxford Nanopore folder structure * according to the schema containing unclassified read information @@ -61,6 +67,9 @@ class OxfordNanoporeExperimentSpec extends Specification { // latest example with slightly different structure stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-example-v3.json") extendedDataStructureWithReportsFolderV3 = (Map) new JsonSlurper().parse(stream) + // nanopore structure containing a second basecalling folder + stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-example-v4-with-basecalling.json") + extendedDataStructureWithReportsFolderV4 = (Map) new JsonSlurper().parse(stream) // read in unclassified example stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-example-unclassified.json") unclassifiedWorkingDataStructure = (Map) new JsonSlurper().parse(stream) @@ -126,6 +135,20 @@ class OxfordNanoporeExperimentSpec extends Specification { assert measurements[0].asicTemp == "32.631687" } + def "Create sample Oxford Nanopore experiment successfully for structure with second basecalling"() { + given: + final def example = extendedDataStructureWithReportsFolderV4 + + when: + final def experiment = OxfordNanoporeExperiment.create(example) + final def measurements = experiment.getMeasurements() + + then: + assert experiment.sampleCode == "QABCD001AB" + assert measurements.size() == 1 + assert measurements[0].asicTemp == "32.631687" + } + def "Create a simple pooled Oxford Nanopore experiment successfully"() { given: final def example = minimalWorkingPooledDataStructure diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/GuppyBasecallLogSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/GuppyBasecallLogSpec.groovy new file mode 100644 index 0000000000..27b5722952 --- /dev/null +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/GuppyBasecallLogSpec.groovy @@ -0,0 +1,37 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import spock.lang.Specification + +/** + * + * + */ +class GuppyBasecallLogSpec extends Specification { + + def "shall create a GuppyBasecallingLog instance"() { + given: + final name = "guppy_basecall_client_log-.log" + final relativePath = "root/basecalling/guppy_basecall_client_log-.log" + + when: + def dataObject = GuppyBasecallLog.create(name, relativePath) + + then: + assert dataObject instanceof GuppyBasecallLog + assert dataObject.relativePath == relativePath + assert dataObject.name == name + } + + def "name not matching schema shall throw IllegalArgumentException"() { + given: + final name = "guppy_basecall.log" + final relativePath = "root/basecalling/guppy_basecall.log" + + when: + def dataObject = GuppyBasecallLog.create(name, relativePath) + + then: + thrown(IllegalArgumentException) + } + +} diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/SequencingTelemetryLogSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/SequencingTelemetryLogSpec.groovy new file mode 100644 index 0000000000..d80b92861c --- /dev/null +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/SequencingTelemetryLogSpec.groovy @@ -0,0 +1,37 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import spock.lang.Specification + +/** + * + * + */ +class SequencingTelemetryLogSpec extends Specification { + + def "shall create a SequencingTelemetryLog instance"() { + given: + final name = "sequencing_telemetry_.js" + final relativePath = "root/basecalling/sequencing_telemetry_.js" + + when: + def dataObject = SequencingTelemetryLog.create(name, relativePath) + + then: + assert dataObject instanceof SequencingTelemetryLog + assert dataObject.relativePath == relativePath + assert dataObject.name == name + } + + def "name not matching schema shall throw IllegalArgumentException"() { + given: + final name = "telemetry.log" + final relativePath = "root/basecalling/telemetry.log" + + when: + def dataObject = SequencingTelemetryLog.create(name, relativePath) + + then: + thrown(IllegalArgumentException) + } + +} diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BasecallingFolderSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BasecallingFolderSpec.groovy new file mode 100644 index 0000000000..074c073b28 --- /dev/null +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BasecallingFolderSpec.groovy @@ -0,0 +1,43 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.DataFile +import life.qbic.datamodel.datasets.datastructure.files.nanopore.GuppyBasecallLog +import spock.lang.Specification + +/** + * + */ +class BasecallingFolderSpec extends Specification { + + def "create basecalling folder"() { + given: + final def name = "basecalling" + final def relativePath = "root/basecalling" + final def children = [] + final def datafile = GuppyBasecallLog.create("guppy_basecall_client_log-.log", "root/basecalling/guppy_basecall_client_log-.log") + children.add(datafile) + + when: + final def dataFolder = BasecallingFolder.create(name, relativePath, children) + + then: + assert dataFolder.getChildren().get(0) instanceof DataFile + + } + + def "naming schema violation should raise an IllegalArgumentException"() { + given: + final def name = "basedcall" + final def relativePath = "root/basedcall" + final def children = [] + final def datafile = GuppyBasecallLog.create("guppy_basecall_client_log-.log", "root/basedcall/guppy_basecall_client_log-.log") + children.add(datafile) + + when: + final def dataFolder = BasecallingFolder.create(name, relativePath, children) + + then: + thrown(IllegalArgumentException) + + } +} diff --git a/src/test/resources/nanopore/valid-example-v4-with-basecalling.json b/src/test/resources/nanopore/valid-example-v4-with-basecalling.json new file mode 100644 index 0000000000..2471b09b2c --- /dev/null +++ b/src/test/resources/nanopore/valid-example-v4-with-basecalling.json @@ -0,0 +1,267 @@ +{ + "name": "QABCD001AB_E12A345a01_PAE12345", + "path": "./", + "children": [ + { + "name": "20200122_1217_1-A1-B1-PAE12345_1234567a", + "metadata": { + "adapter": "flongle", + "asic_temp": "32.631687", + "base_caller": "Guppy", + "base_caller_version": "3.2.8+bd67289", + "device_type": "promethion", + "flow_cell_id": "PAE26306", + "flow_cell_product_code": "FLO-PRO002", + "flow_cell_position": "2-A3-D3", + "hostname": "PCT0094", + "protocol": "sequencing/sequencing_PRO002_DNA:FLO-PRO002:SQK-LSK109:True", + "started": "2020-02-11T15:52:10.465982+01:00" + }, + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a", + "children": [ + { + "name": "throughput_.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/throughput_.csv", + "file_type": "csv" + }, + { + "name": "report_.md", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/report_.md", + "file_type": "md" + }, + { + "name": "final_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/final_summary_.txt", + "file_type": "txt" + }, + { + "name": "fastq_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile1.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_pass/myfile1.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "fastq_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fastq_fail/myfile.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "duty_time_.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/duty_time_.csv", + "file_type": "csv" + }, + { + "name": "sequencing_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/sequencing_summary_.txt", + "file_type": "txt" + }, + { + "name": "mux_scan_data.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/mux_scan_data.csv", + "file_type": "csv" + }, + { + "name": "drift_correction_.csv", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/drift_correction_.csv", + "file_type": "csv" + }, + { + "name": "fast5_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/", + "children": [ + { + "name": "myfile2.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile2.fast5", + "file_type": "fast5" + }, + { + "name": "myfile4.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile4.fast5", + "file_type": "fast5" + }, + { + "name": "myfile3.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile3.fast5", + "file_type": "fast5" + }, + { + "name": "myfile5.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile5.fast5", + "file_type": "fast5" + }, + { + "name": "myfile.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_fail/myfile.fast5", + "file_type": "fast5" + } + ] + }, + { + "name": "fast5_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/", + "children": [ + { + "name": "myfile2.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile2.fast5", + "file_type": "fast5" + }, + { + "name": "myfile4.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile4.fast5", + "file_type": "fast5" + }, + { + "name": "myfile3.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile3.fast5", + "file_type": "fast5" + }, + { + "name": "myfile5.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile5.fast5", + "file_type": "fast5" + }, + { + "name": "myfile.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_pass/myfile.fast5", + "file_type": "fast5" + } + ] + }, + { + "name": "basecalling", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/", + "children": [ + { + "name": "sequencing_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/sequencing_summary_.txt", + "file_type": "txt" + }, + { + "name": "sequencing_telemetry_.js", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/sequencing_telemetry_.js", + "file_type": "js" + }, + { + "name": "guppy_basecall_client_log-1234-56-78_90.log", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/guppy_basecall_client_log-1234-56-78_90.log", + "file_type": "log" + }, + { + "name": "fastq_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "fastq_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile.fastq.gz", + "file_type": "fastq.gz" + } + ] + } + ] + } + ] + } + ] +}