Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding warning for access edge cases when linking #70

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified .gitignore
100644 → 100755
Empty file.
Empty file modified Makefile
100644 → 100755
Empty file.
Empty file modified README.md
100644 → 100755
Empty file.
Empty file modified apps/__init__.py
100644 → 100755
Empty file.
52 changes: 47 additions & 5 deletions apps/access/__init__.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from pathlib import Path
import shutil
import requests

import re
import json
FLAG_TO_APPS = {
"dmpmanifest": ("access_manifest", "manifest"),
"msi": ("access legacy MSI", "microsatellite_instability"),
Expand Down Expand Up @@ -54,6 +55,7 @@ def get_operator_run(app_name, app_version=None, tags=None, config=None, show_al
"app_name": app_name
}

operator_look_ahead(latest_operator_run, config, tags)
if show_all_runs:
latest_operator_run.pop("status")

Expand All @@ -69,13 +71,34 @@ def get_operator_run(app_name, app_version=None, tags=None, config=None, show_al
if "igoRequestId" in tags:
new_tag = tags.replace("igoRequestId", "requestId")
return get_operator_run(app_name, app_version, tags=new_tag, config=config)

else:
print("There are no completed operator runs for this request in the following app: %s:%s" %
(str(app_name), str(app_version)), file=sys.stderr)
return None

return latest_runs[0]

def operator_look_ahead(latest_operator_run, config, tags):
threshold = float(.90)
complete = float(1.0)
try:
request_id = json.loads(tags)['igoRequestId']
except:
request_id = json.loads(tags)['requestId']
latest_operator_run_look_ahead = latest_operator_run.copy()
latest_operator_run_look_ahead.pop("status")
response_look_ahead = requests.get(urljoin(config['beagle_endpoint'], config['api']['operator-runs']),
headers={'Authorization': 'Bearer %s' % config['token']},
params=latest_operator_run)
if response_look_ahead.json()["results"]:
total_r = response_look_ahead.json()["results"][0]["num_total_runs"]
completed_r = response_look_ahead.json()["results"][0]["num_completed_runs"]
percent_c = completed_r / total_r
if (percent_c >= threshold) and (percent_c < complete):
print(f"Warning there is a more recent operator run for request {request_id} that is incomplete, but with {percent_c} of runs completed. This may be the operator run you need for analysis. Consult the request's operator run history and consider using the --all-runs flag if appropriate.")


def open_request_file(request_ids_file):
try:
with open(request_ids_file,'r') as file:
Expand Down Expand Up @@ -114,13 +137,11 @@ def get_runs(operator_run_id, config, show_all_runs):
"page_size": 1000,
"status": "COMPLETED"
}

if show_all_runs:
run_params.pop("status")

response = requests.get(urljoin(config['beagle_endpoint'], config['api']['run']),
headers={'Authorization': 'Bearer %s' % config['token']}, params=run_params)

return response.json()["results"]

def get_run_by_id(run_id, config):
Expand Down Expand Up @@ -162,6 +183,7 @@ def link_app(operator_run, directory, request_id, sample_id, arguments, config,
except Exception as e:
print("could not delete symlink: {} ".format(path / run["id"]), file=sys.stderr)
else:
is_run_manual(run, request_id)
try:
os.symlink(run["output_directory"], path / run["id"])
print((path / run["id"]).absolute(), file=sys.stdout)
Expand Down Expand Up @@ -287,14 +309,20 @@ def link_bams_by_patient_id(operator_run, directory, request_id, sample_id, argu

if not runs:
return

add_bai=False
if len(runs) == 1:
manual_name="Run Access Legacy Fastq to Bam (file outputs) - Manual"
if manual_name in runs[0]["name"]:
add_bai=True

files = [] # (sample_id, /path/to/file)

for run in runs:
for file_group in get_files_by_run_id(run["id"], config):
files = files + find_files_by_sample(file_group["value"], sample_id=sample_id)

accepted_file_types = ['.bam', '.bai']

for (sample_id, file) in files:
file_path = get_file_path(file)
_, file_ext = os.path.splitext(file_path)
Expand Down Expand Up @@ -323,6 +351,10 @@ def link_bams_by_patient_id(operator_run, directory, request_id, sample_id, argu
try:
os.symlink(file_path, sample_version_path / file_name)
print((sample_version_path / file_name).absolute(), file=sys.stdout)
if add_bai:
file_name_index = file_name.replace(".bam", ".bai")
os.symlink(file_path, sample_version_path / file_name_index)
print((sample_version_path / file_name_index).absolute(), file=sys.stdout)
except Exception as e:
print("Could not create symlink from '{}' to '{}'".format(sample_version_path / file_name, file_path), file=sys.stderr)
continue
Expand All @@ -339,6 +371,16 @@ def link_bams_by_patient_id(operator_run, directory, request_id, sample_id, argu
pass

return "Completed"
def is_run_manual(run, request_id):
pattern = "/work/access/production/data/bams/*"
match = re.match(pattern, run["output_directory"])
patient_dir = run["output_directory"] + '/current'
proj_dir = Path("./") / ("Project_" + request_id)
if match:
if Path(patient_dir).is_dir():
return True
else:
raise FileNotFoundError(f'The folder {patient_dir} does not exist. Bams for request {request_id} were manually imported to Voyager. Please link patients in the data folder before linking the project folder.')

def find_files_by_sample(file_group, sample_id = None):
def traverse(file_group):
Expand Down
Empty file modified apps/cleaning/__init__.py
100644 → 100755
Empty file.
Empty file modified apps/cmoch/__init__.py
100644 → 100755
Empty file.
Empty file modified apps/lims.py
100644 → 100755
Empty file.
Empty file modified requirements.txt
100644 → 100755
Empty file.
Empty file modified scripts/README.md
100644 → 100755
Empty file.
Empty file modified scripts/bin/access_beagle_endpoint.py
100644 → 100755
Empty file.