Skip to content

Commit

Permalink
Prepare release 1.11.0 (#96)
Browse files Browse the repository at this point in the history
* change ms conversion server

* remove some log files dependent on origin lab (#95)

Co-authored-by: Tobias Koch <[email protected]>

* handle data without unclassified reads

* Update CL (#92)

Co-authored-by: Tobias Koch <[email protected]>
Co-authored-by: Sven F <[email protected]>
  • Loading branch information
3 people authored May 9, 2022
1 parent e1cea11 commit 0a9e79a
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 10 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

# 1.11.0 2022-05-09

* Nanopore data: allows new result structure with barcode alignment file and 'other reports' folder (requires core-utils-lib 1.11.0 or later)
* Nanopore data: introduces map to remove log files that should not be saved dependent on different datamover origins
* Fix for nanopore: data without unclassified reads does not lead to error upon registration
* Ptx mass spec data: change msconvert server

# 1.10.0 2021-07-26

* Provides new ETL for MTB project data that are not supposed to be stored in QUK17 [(#89)](https://github.com/qbicsoftware/etl-scripts/pull/89)
Expand Down
4 changes: 2 additions & 2 deletions drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
DROPBOX_PATH = "/mnt/DSS1/openbis_dss/QBiC-convert-register-ms-vendor-format/"
VENDOR_FORMAT_EXTENSIONS = {'.raw':'RAW_THERMO', '.d':'D_BRUKER'}#,'.wiff':'WIFF_SCIEX'}
WATERS_FORMAT = "RAW_WATERS"
MSCONVERT_HOST = "qmsconvert.am10.uni-tuebingen.de"
MSCONVERT_HOST = "qmsconvert2.am10.uni-tuebingen.de"
MSCONVERT_USER = "qbic"
REMOTE_BASE = "/cygdrive/d/etl-convert"
CONVERSION_TIMEOUT = 7200
Expand Down Expand Up @@ -971,4 +971,4 @@ def process(transaction):

for f in os.listdir(incomingPath):
if ".testorig" in f:
os.remove(os.path.realpath(os.path.join(incomingPath, f)))
os.remove(os.path.realpath(os.path.join(incomingPath, f)))
33 changes: 25 additions & 8 deletions drop-boxes/register-nanopore-dropbox/register-nanopore.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
usedExperimentIdentifiers = set()
checksumMap = {}

# facilities that want us to remove certain log files
blacklistedByFacility = {"qeana03-imgagdna": ["DutyTimeLog", "FinalSummaryLog", "ReportMdLog", "ReportPDFLog", "ThroughputLog", "DriftCorrectionLog", "MuxScanDataLog"]}

def createNewSample(transaction, space, parentSampleCode):
run = 0
sampleExists = True
Expand Down Expand Up @@ -100,15 +103,26 @@ def getTimeStamp():
ts = str(now.minute)+str(now.second)+str(now.microsecond)
return ts

def copyFileTo(file, filePath, targetFolderPath):
sourcePath = os.path.join(filePath, file.getName())
shutil.copy2(sourcePath, targetFolderPath)
src = os.path.join(filePath, file.getName())
shutil.copy2(src, targetFolderPath)

# copies log files from a folder that may contain other files to another path
def copyLogFilesTo(logFiles, filePath, targetFolderPath):
# log files that are blacklisted are not copied and thus not registered (after metadata extraction)
def copyLogFilesTo(logFiles, filePath, targetFolderPath, facilityName):
# return list of files to remove for this facility or empty list
blacklist = blacklistedByFacility.get(facilityName, [])
numIgnoredFiles = 0
for logFile in logFiles:
sourcePath = os.path.join(filePath, logFile.getName())
shutil.copy2(sourcePath, targetFolderPath)
src = os.path.join(filePath, logFile.getName())
shutil.copy2(src, targetFolderPath)
fileType = logFile.__class__.__name__
if fileType in blacklist:
numIgnoredFiles += 1
else :
copyFileTo(logFile, filePath, targetFolderPath)
copiedContent = os.listdir(targetFolderPath)
if len(copiedContent) != len(logFiles):
if len(copiedContent) + numIgnoredFiles != len(logFiles):
raise AssertionError("Not all log files have been copied successfully to target log folder.")

def createLogFolder(targetPath):
Expand All @@ -117,6 +131,9 @@ def createLogFolder(targetPath):
os.makedirs(newLogFolder)
return newLogFolder

def containsUnclassifiedData(unclassifiedMap):
return not all(v is None for v in unclassifiedMap.values())

def createExperimentFromMeasurement(transaction, currentPath, space, project, measurement, origin, rawDataPerSample):
""" Register the experiment with samples in openBIS.
In order to register the Nanopore experiment with its measurements in openBIS,
Expand Down Expand Up @@ -162,10 +179,10 @@ def createExperimentFromMeasurement(transaction, currentPath, space, project, me
datamap = rawDataPerSample.get(barcode)
newLogFolder = createLogFolder(currentPath)
# 3.) Aggregate all log files into an own log folder per measurement
copyLogFilesTo(measurement.getLogFiles(), currentPath, newLogFolder)
copyLogFilesTo(measurement.getLogFiles(), currentPath, newLogFolder, origin)
createSampleWithData(transaction, space, barcode, datamap, runExperiment, currentPath, newLogFolder)
unclassifiedMap = measurement.getUnclassifiedData()
if len(unclassifiedMap) > 0:
if containsUnclassifiedData(unclassifiedMap):
registerUnclassifiedData(transaction, unclassifiedMap, runExperiment, currentPath, measurement.getFlowcellId())

# fills the global dictionary containing all checksums for paths from the global checksum file
Expand Down

0 comments on commit 0a9e79a

Please sign in to comment.