Skip to content

Commit

Permalink
Merge pull request #25 from umccr/datashare_atlas
Browse files Browse the repository at this point in the history
datashare: remove problematic json input string prior to parsing
  • Loading branch information
pdiakumis authored Oct 18, 2024
2 parents 9084529 + d016909 commit 42af0ed
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
^README\.Rmd$
^\.DS_Store$
^\.Rproj\.user$
^\.bumpversion\.cfg$
^\.bumpversion\.toml$
^\.dockerignore$
^\.dvc$
^\.dvcignore$
Expand Down
28 changes: 20 additions & 8 deletions R/datasharing.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
#'
#' @return Tibble with presigned URLs.
#' @examples
#' \dontrun{
#' sid <- "SBJ03144"
#' lid <- "L2301290"
#' datashare_um(sid, lid)
#' }
#' @export
datashare_um <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN")) {
sid_lid <- glue("{sid}__{lid}")
Expand Down Expand Up @@ -42,7 +44,7 @@ datashare_um <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN")) {
"REGEXP_LIKE(\"wfr_name\", 'umccr__automated__umccrise__{sid_lid}') ",
"ORDER BY \"start\" DESC;"
)
d_um_raw <- rportal::portaldb_query_workflow(query_um)
d_um_raw <- portaldb_query_workflow(query_um)
n_um_runs <- nrow(d_um_raw)
if (n_um_runs == 0) {
cli::cli_abort("No umccrise results found for {sid_lid}")
Expand All @@ -56,7 +58,7 @@ datashare_um <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN")) {
)
cli::cli_alert_info(msg)
}
d_um_tidy <- rportal::meta_umccrise(d_um_raw)
d_um_tidy <- meta_umccrise(d_um_raw)
um_dragen_input <- d_um_tidy[["gds_indir_dragen_somatic"]]
stopifnot(!is.na(um_dragen_input))

Expand All @@ -65,11 +67,11 @@ datashare_um <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN")) {
"REGEXP_LIKE(\"wfr_name\", 'umccr__automated__wgs_tumor_normal__{sid_lid}') ",
"ORDER BY \"start\" DESC;"
)
d_tn_raw <- rportal::portaldb_query_workflow(query_tn)
d_tn_raw <- portaldb_query_workflow(query_tn)
if (nrow(d_tn_raw) == 0) {
cli::cli_abort("No wgs_tumor_normal results found for {sid_lid}")
}
d_tn_tidy <- rportal::meta_wgs_tumor_normal(d_tn_raw)
d_tn_tidy <- meta_wgs_tumor_normal(d_tn_raw)
n_tn_runs <- nrow(d_tn_tidy)
if (n_tn_runs > 1) {
if (um_dragen_input %in% d_tn_tidy[["gds_outdir_dragen_somatic"]]) {
Expand Down Expand Up @@ -174,10 +176,19 @@ datashare_um <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN")) {
#' @param sid SubjectID.
#' @param lid LibraryID of WTS tumor.
#' @param token_ica ICA_ACCESS_TOKEN.
#' @param wfrn_prefix ICA workflow run name prefix. Specify if you need something
#' other than the default 'umccr__automated__wts_tumor_only'.
#'
#' @return Tibble with presigned URLs.
#' @examples
#' \dontrun{
#' datashare_wts(sid = "SBJ05560", lid = "L2401254")
#' datashare_wts(sid = "SBJ05424", lid = "L2401135", wfrn_prefix = "umccr__atlas__wts_tumor_only")
#' }
#'
#' @export
datashare_wts <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN")) {
datashare_wts <- function(sid, lid, wfrn_prefix = "umccr__automated__wts_tumor_only",
token_ica = Sys.getenv("ICA_ACCESS_TOKEN")) {
sid_lid <- glue("{sid}__{lid}")
wts_files <- dplyr::tribble(
~regex, ~fun,
Expand All @@ -194,10 +205,11 @@ datashare_wts <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN"))
)
query_wts <- glue(
"WHERE \"type_name\" = 'wts_tumor_only' AND \"end_status\" = 'Succeeded' AND ",
"REGEXP_LIKE(\"wfr_name\", 'umccr__automated__wts_tumor_only__{sid_lid}') ",
"REGEXP_LIKE(\"wfr_name\", '{wfrn_prefix}__{sid_lid}') ",
"ORDER BY \"start\" DESC;"
)
d_wts_raw <- rportal::portaldb_query_workflow(query_wts)
d_wts_raw <- portaldb_query_workflow(query_wts)

n_wts_runs <- nrow(d_wts_raw)
if (n_wts_runs == 0) {
cli::cli_abort("No WTS results found for {sid_lid}")
Expand All @@ -211,7 +223,7 @@ datashare_wts <- function(sid, lid, token_ica = Sys.getenv("ICA_ACCESS_TOKEN"))
)
cli::cli_alert_info(msg)
}
d_wts_tidy <- rportal::meta_wts_tumor_only(d_wts_raw)
d_wts_tidy <- meta_wts_tumor_only(d_wts_raw)
d_wts_urls1 <- d_wts_tidy[["gds_outdir_dragen"]] |>
dracarys::gds_list_files_filter_relevant(
token = token_ica, include_url = TRUE, page_size = 100, regexes = wts_files
Expand Down
10 changes: 8 additions & 2 deletions inst/scripts/datashare/datashare.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ suppressMessages(library(optparse, include.only = "make_option"))
option_list <- list(
optparse::make_option("--subject_id", type = "character", help = "Subject ID."),
optparse::make_option("--library_id_tumor", type = "character", help = "Library ID of tumor."),
optparse::make_option("--wts_wfrn_prefix",
type = "character", help = "ICA Workflow Run Name prefix. Use if other than the default.",
default = "umccr__automated__wts_tumor_only"
),
optparse::make_option("--wts", action = "store_true", type = "character", help = "This is a WTS library."),
optparse::make_option("--csv_output", type = "character", help = "CSV output path."),
optparse::make_option("--append", action = "store_true", help = "Append to existing file (or write to new one if file does not exist -- caution: no column headers are written)."),
Expand All @@ -16,7 +20,8 @@ opt <- optparse::parse_args(parser)
# library_id_tumor = "L2401254",
# wts = TRUE,
# csv_output = "FOO.csv",
# append = TRUE
# append = TRUE,
# wts_wfrn_prefix = "umccr__automated__wts_tumor_only"
# )

if (!is.null(opt[["version"]])) {
Expand Down Expand Up @@ -60,6 +65,7 @@ LibraryID_tumor <- opt[["library_id_tumor"]]
csv_output <- opt[["csv_output"]]
csv_append <- opt[["append"]]
wts <- opt[["wts"]]
wts_wfrn_prefix <- opt[["wts_wfrn_prefix"]]
fs::dir_create(dirname(csv_output))
cli::cli_alert_info("Start datasharing for {SubjectID}__{LibraryID_tumor}")

Expand All @@ -70,7 +76,7 @@ c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION", "ICA_ACCESS_TOKEN"
token_ica <- Sys.getenv("ICA_ACCESS_TOKEN") |> dracarys::ica_token_validate()

if (wts) {
urls <- rportal::datashare_wts(sid = SubjectID, lid = LibraryID_tumor, token_ica = token_ica)
urls <- rportal::datashare_wts(sid = SubjectID, lid = LibraryID_tumor, wfrn_prefix = wts_wfrn_prefix, token_ica = token_ica)
} else {
urls <- rportal::datashare_um(sid = SubjectID, lid = LibraryID_tumor, token_ica = token_ica)
}
Expand Down
5 changes: 3 additions & 2 deletions inst/scripts/datashare/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

set -euo pipefail

./datashare.R --subject_id SBJ03144 --library_id_tumor L2301290 --csv_output urls.csv
./datashare.R --subject_id SBJ04397 --library_id_tumor L2301291 --csv_output urls.csv --append
#./datashare.R --subject_id SBJ03144 --library_id_tumor L2301290 --csv_output urls.csv
#./datashare.R --subject_id SBJ04397 --library_id_tumor L2301291 --csv_output urls.csv --append
./datashare.R --wts --subject_id SBJ05560 --library_id_tumor L2401254 --csv_output urls.csv --append
./datashare.R --wts --subject_id SBJ05424 --library_id_tumor L2401135 --wts_wfrn_prefix umccr__atlas__wts_tumor_only --csv_output urls.csv --append
9 changes: 8 additions & 1 deletion man/datashare_um.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 16 additions & 1 deletion man/datashare_wts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/orca_libid2workflows.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 42af0ed

Please sign in to comment.