diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a5663c4..f10df448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +# 1.11.0 2022-05-09 + +* Nanopore data: allows new result structure with barcode alignment file and 'other reports' folder (requires core-utils-lib 1.11.0 or later) +* Nanopore data: introduces map to remove log files that should not be saved dependent on different datamover origins +* Fix for nanopore: data without unclassified reads does not lead to error upon registration +* Ptx mass spec data: change msconvert server + # 1.10.0 2021-07-26 * Provides new ETL for MTB project data that are not supposed to be stored in QUK17 [(#89)](https://github.com/qbicsoftware/etl-scripts/pull/89) diff --git a/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py b/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py index 13dd049f..7dff3b87 100644 --- a/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py +++ b/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py @@ -67,7 +67,7 @@ DROPBOX_PATH = "/mnt/DSS1/openbis_dss/QBiC-convert-register-ms-vendor-format/" VENDOR_FORMAT_EXTENSIONS = {'.raw':'RAW_THERMO', '.d':'D_BRUKER'}#,'.wiff':'WIFF_SCIEX'} WATERS_FORMAT = "RAW_WATERS" -MSCONVERT_HOST = "qmsconvert.am10.uni-tuebingen.de" +MSCONVERT_HOST = "qmsconvert2.am10.uni-tuebingen.de" MSCONVERT_USER = "qbic" REMOTE_BASE = "/cygdrive/d/etl-convert" CONVERSION_TIMEOUT = 7200 @@ -971,4 +971,4 @@ def process(transaction): for f in os.listdir(incomingPath): if ".testorig" in f: - os.remove(os.path.realpath(os.path.join(incomingPath, f))) \ No newline at end of file + os.remove(os.path.realpath(os.path.join(incomingPath, f))) diff --git a/drop-boxes/register-nanopore-dropbox/register-nanopore.py b/drop-boxes/register-nanopore-dropbox/register-nanopore.py index 059c4fdd..5b0aca8c 100644 --- a/drop-boxes/register-nanopore-dropbox/register-nanopore.py +++ b/drop-boxes/register-nanopore-dropbox/register-nanopore.py @@ -58,6 +58,9 @@ usedExperimentIdentifiers = set() checksumMap = {} +# facilities that want us to remove certain log files +blacklistedByFacility = {"qeana03-imgagdna": ["DutyTimeLog", "FinalSummaryLog", "ReportMdLog", "ReportPDFLog", "ThroughputLog", "DriftCorrectionLog", "MuxScanDataLog"]} + def createNewSample(transaction, space, parentSampleCode): run = 0 sampleExists = True @@ -100,15 +103,26 @@ def getTimeStamp(): ts = str(now.minute)+str(now.second)+str(now.microsecond) return ts +def copyFileTo(file, filePath, targetFolderPath): + sourcePath = os.path.join(filePath, file.getName()) + shutil.copy2(sourcePath, targetFolderPath) + src = os.path.join(filePath, file.getName()) + shutil.copy2(src, targetFolderPath) + # copies log files from a folder that may contain other files to another path -def copyLogFilesTo(logFiles, filePath, targetFolderPath): +# log files that are blacklisted are not copied and thus not registered (after metadata extraction) +def copyLogFilesTo(logFiles, filePath, targetFolderPath, facilityName): + # return list of files to remove for this facility or empty list + blacklist = blacklistedByFacility.get(facilityName, []) + numIgnoredFiles = 0 for logFile in logFiles: - sourcePath = os.path.join(filePath, logFile.getName()) - shutil.copy2(sourcePath, targetFolderPath) - src = os.path.join(filePath, logFile.getName()) - shutil.copy2(src, targetFolderPath) + fileType = logFile.__class__.__name__ + if fileType in blacklist: + numIgnoredFiles += 1 + else : + copyFileTo(logFile, filePath, targetFolderPath) copiedContent = os.listdir(targetFolderPath) - if len(copiedContent) != len(logFiles): + if len(copiedContent) + numIgnoredFiles != len(logFiles): raise AssertionError("Not all log files have been copied successfully to target log folder.") def createLogFolder(targetPath): @@ -117,6 +131,9 @@ def createLogFolder(targetPath): os.makedirs(newLogFolder) return newLogFolder +def containsUnclassifiedData(unclassifiedMap): + return not all(v is None for v in unclassifiedMap.values()) + def createExperimentFromMeasurement(transaction, currentPath, space, project, measurement, origin, rawDataPerSample): """ Register the experiment with samples in openBIS. In order to register the Nanopore experiment with its measurements in openBIS, @@ -162,10 +179,10 @@ def createExperimentFromMeasurement(transaction, currentPath, space, project, me datamap = rawDataPerSample.get(barcode) newLogFolder = createLogFolder(currentPath) # 3.) Aggregate all log files into an own log folder per measurement - copyLogFilesTo(measurement.getLogFiles(), currentPath, newLogFolder) + copyLogFilesTo(measurement.getLogFiles(), currentPath, newLogFolder, origin) createSampleWithData(transaction, space, barcode, datamap, runExperiment, currentPath, newLogFolder) unclassifiedMap = measurement.getUnclassifiedData() - if len(unclassifiedMap) > 0: + if containsUnclassifiedData(unclassifiedMap): registerUnclassifiedData(transaction, unclassifiedMap, runExperiment, currentPath, measurement.getFlowcellId()) # fills the global dictionary containing all checksums for paths from the global checksum file