Skip to content

Commit

Permalink
Update input_assure.py to include additional check for multiple keys
Browse files Browse the repository at this point in the history
  • Loading branch information
kylacochrane committed Jun 10, 2024
1 parent 23c1397 commit c7252cf
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 26 deletions.
52 changes: 34 additions & 18 deletions bin/input_assure.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import json
import argparse
import sys
import csv
import gzip

Expand All @@ -14,31 +13,48 @@ def open_file(file_path, mode):
return open(file_path, mode)

def check_inputs(json_file, sample_id, address, output_error_file):
# Define a variable to store the match_status (True or False)
with open(json_file, "rt") as f:
with open_file(json_file, "rt") as f:
json_data = json.load(f)
match_status = sample_id in json_data

# Define the original key in the JSON data
original_key = list(json_data.keys())[0]
# Define a variable to store the match_status (True or False)
match_status = sample_id in json_data

keys = list (json_data.keys())
original_key = keys[0]

# Initialize the error message
error_message = None

# Check for multiple keys in the JSON file and define error message
if len(keys) > 1:
# Check if sample_id matches any key
if not match_status:
error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed."
# Retain only the specified sample ID
json_data = {sample_id: json_data.pop(original_key)}
else:
error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry"
# Remove all keys expect the one matching sample_id
json_data = {sample_id: json_data[sample_id]}
elif not match_status:
# Define error message based on meta.address (query or reference)
if address == "null":
error_message = f"Query {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
else:
error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."

# Write sample ID and JSON key to error report CSV if not matched; include error message
if not match_status:
if address == "null":
error_message = f"Query {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
else:
error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
# Update the JSON file with the new sample ID
json_data[sample_id] = json_data.pop(original_key)

# Write file containing relevant error messages
if error_message:
with open(output_error_file, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["sample", "JSON_key", "error_message"])
writer.writerow([sample_id, original_key, error_message])
writer.writerow([sample_id, keys, error_message])

# Update the JSON file with the new sample ID
json_data[sample_id] = json_data.pop(original_key)
with open(json_file, "wt") as f:
json.dump(json_data, f, indent=4)
# Write the updated JSON data back to the original file
with open_file(json_file, "wt") as f:
json.dump(json_data, f, indent=4)

if __name__ == "__main__":
parser = argparse.ArgumentParser(
Expand Down
6 changes: 3 additions & 3 deletions modules/local/input_assure/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process INPUT_ASSURE {
tag "Check Sample Inputs and Generate Error Report"
tag "Assures Inputs are Consistent"
label 'process_single'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -10,14 +10,14 @@ process INPUT_ASSURE {
tuple val(meta), path(mlst)

output:
tuple val(meta), path(mlst), emit: match
tuple val(meta), path(mlst), emit: result
tuple val(meta), path("*_error_report.csv"), optional: true, emit: error_report
path("versions.yml"), emit: versions

script:

"""
input_check.py \\
input_assure.py \\
--input ${mlst} \\
--sample_id ${meta.id} \\
--address ${meta.address} \\
Expand Down
10 changes: 5 additions & 5 deletions workflows/gas_nomenclature.nf
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ workflow GAS_NOMENCLATURE {
input = Channel.fromSamplesheet("input")

// Ensure meta.id and mlst_file keys match; generate error report for samples where id ≠ key
id_key = INPUT_ASSURE(input)
ch_versions = ch_versions.mix(id_key.versions)
input_assure = INPUT_ASSURE(input)
ch_versions = ch_versions.mix(input_assure.versions)

// Prepare reference and query TSV files for LOCIDEX_MERGE
profiles = id_key.match.branch {
profiles = input_assure.result.branch {
query: !it[0].address
}
reference_values = input.collect{ meta, profile -> profile}
query_values = profiles.query.collect{ meta, profile -> profile }
reference_values = input_assure.result.collect{ meta, mlst -> mlst}
query_values = profiles.query.collect{ meta, mlst -> mlst }

// LOCIDEX modules
ref_tag = Channel.value("ref")
Expand Down

0 comments on commit c7252cf

Please sign in to comment.