From 3840f7de0231422c6859b6e01782980bd2593dcd Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Mon, 24 Jun 2024 13:41:12 +1000 Subject: [PATCH 1/8] parameterise start/end date --- inst/reports/seqrunsum/render.sh | 10 +++++----- inst/reports/seqrunsum/report.qmd | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/inst/reports/seqrunsum/render.sh b/inst/reports/seqrunsum/render.sh index 94f3f55..356685f 100644 --- a/inst/reports/seqrunsum/render.sh +++ b/inst/reports/seqrunsum/render.sh @@ -1,9 +1,9 @@ -start="2024-05-18" -end="2024-05-20" -out="seqrunsum_${start}_${end}.html" +date_start="2024-06-22" +date_end="2024-06-24" +out="seqrunsum_${date_start}_${date_end}.html" quarto render report.qmd \ - -P date_start:${start} \ - -P date_end:${end} \ + -P date_start:${date_start} \ + -P date_end:${date_end} \ -o ${out} \ --output-dir nogit diff --git a/inst/reports/seqrunsum/report.qmd b/inst/reports/seqrunsum/report.qmd index 856e5a0..e514355 100644 --- a/inst/reports/seqrunsum/report.qmd +++ b/inst/reports/seqrunsum/report.qmd @@ -27,8 +27,8 @@ format: body-width: 1300px params: title: "UMCCR PortalDB Workflow Summary" - date_start: "2024-06-15" - date_end: "2024-06-17" + date_start: "XXXX-XX-XX" + date_end: "XXXX-XX-XX" --- ```{r} From 6a05c3d753e9c44f70ec66cbda1d6e6749387768 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Mon, 1 Jul 2024 11:34:12 +1000 Subject: [PATCH 2/8] seqrunsum: disable rds write --- inst/reports/seqrunsum/render.sh | 4 ++-- inst/reports/seqrunsum/report.qmd | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/inst/reports/seqrunsum/render.sh b/inst/reports/seqrunsum/render.sh index 356685f..813d680 100644 --- a/inst/reports/seqrunsum/render.sh +++ b/inst/reports/seqrunsum/render.sh @@ -1,5 +1,5 @@ -date_start="2024-06-22" -date_end="2024-06-24" +date_start="2024-06-28" +date_end="2024-07-01" out="seqrunsum_${date_start}_${date_end}.html" quarto render report.qmd \ diff --git a/inst/reports/seqrunsum/report.qmd b/inst/reports/seqrunsum/report.qmd index e514355..723e6d9 100644 --- a/inst/reports/seqrunsum/report.qmd +++ b/inst/reports/seqrunsum/report.qmd @@ -1,6 +1,6 @@ --- title: "{{< meta params.title >}}" -subtitle: "Period: {{< meta params.date_start >}} to {{< meta params.date_end >}}" +subtitle: "Period: `r paste(params$date_start, ' to ', params$date_end)`" author: "UMCCR - Genomics Platform Group" date: now date-format: "YYYY-MM-DD HH:mm Z" @@ -86,14 +86,16 @@ wfs <- tibble::tribble( ``` ```{r} -#| label: query_workflow_table +#| label: aws_connect invisible(capture.output(rportal::awsvault_profile("upro"))) +``` + + +```{r} +#| label: query_workflow_table +#| message: false query_wf <- glue('WHERE "start" >= date(\'{dstart}\') AND "start" <= date(\'{dend}\') ORDER BY "start" DESC;') -pmeta_rds <- here(glue("nogit/data_portal/workflows/{as.Date(date_end)}.rds")) -# fs::dir_create(here(glue("nogit/data_portal/workflows"))) -# pmeta_raw <- rportal::portaldb_query_workflow(query_wf) -# saveRDS(pmeta_raw, file = pmeta_rds) -pmeta_raw <- readr::read_rds(pmeta_rds) +pmeta_raw <- rportal::portaldb_query_workflow(query_wf) # check there are no rogue unaccounted wfs run stopifnot(all(pmeta_raw[["type_name"]] %in% wfs[["name"]])) ``` @@ -120,13 +122,10 @@ sbjids <- funcs$get_ids(pmeta_tidy, "SubjectID") # not used anywhere (yet?) ```{r} #| label: query_limsrow_table -# fs::dir_create(here(glue("nogit/data_portal/lims"))) -lims_rds <- here(glue("nogit/data_portal/lims/{as.Date(date_end)}.rds")) +#| message: false libidsq <- paste(libids, collapse = "|") query_lims <- glue("WHERE REGEXP_LIKE(\"library_id\", '{libidsq}');") -# lims_raw <- rportal::portaldb_query_limsrow(query_lims) -# saveRDS(lims_raw, file = lims_rds) -lims_raw <- readr::read_rds(lims_rds) +lims_raw <- rportal::portaldb_query_limsrow(query_lims) lims <- lims_raw |> select( SubjectID = "subject_id", SampleID = "sample_id", LibraryID = "library_id", From e73744ae2f4f478576c7d337d234f2bff91bd620 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Mon, 15 Jul 2024 20:22:18 +1000 Subject: [PATCH 3/8] seqrunsum: recycle colours based on sbj count --- inst/reports/seqrunsum/funcs.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/reports/seqrunsum/funcs.R b/inst/reports/seqrunsum/funcs.R index f3f7886..6d41978 100644 --- a/inst/reports/seqrunsum/funcs.R +++ b/inst/reports/seqrunsum/funcs.R @@ -49,10 +49,10 @@ funcs <- list( ) |> ungroup() |> tidyr::unnest(clrs) |> + distinct(.data$clrs, .keep_all = TRUE) |> pull(clrs) - max_col <- length(clrs) - stopifnot(nc <= max_col, nc > 0) - clrs[seq_len(nc)] + # recycle colour vector according to nc + rep_len(clrs, length.out = nc) }, #----# get_sbj_url = function(x, colour = NULL, account = "pro") { From 984d4a4d6429dcf6f7821ebe6edf0c96706c406f Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Tue, 23 Jul 2024 17:06:43 +1000 Subject: [PATCH 4/8] alignqc: include index + illumina_id info --- R/meta_wgs_alignment_qc.R | 18 ++++++++++++++++-- R/meta_wts_alignment_qc.R | 18 ++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/R/meta_wgs_alignment_qc.R b/R/meta_wgs_alignment_qc.R index 3d4b6f4..dd985fe 100644 --- a/R/meta_wgs_alignment_qc.R +++ b/R/meta_wgs_alignment_qc.R @@ -27,22 +27,36 @@ meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") { meta_io_fromjson() |> dplyr::mutate( # input - rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")), + rgid = purrr::map_chr(.data$input, list("fastq_list_rows", "rgid")), rgsm = purrr::map_chr(.data$input, list("fastq_list_rows", "rgsm")), + rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")), lane = purrr::map_int(.data$input, list("fastq_list_rows", "lane")), lane = as.character(.data$lane), + # read_1/read_2 are dfs + fq1 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_1", "location"), .default = NA), + fq2 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_2", "location"), .default = NA), # output gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA), gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA), SubjectID = sub("umccr__automated__wgs_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name), + ) |> + tidyr::separate_wider_delim( + cols = "rgid", delim = ".", + names = c("index1", "index2", "lane2", "illumina_id", "sample_lib_id") ) + d |> dplyr::select( dplyr::all_of(meta_main_cols()), "SubjectID", LibraryID = "rglb", SampleID = "rgsm", - Lane = "lane", + "lane", + "index1", + "index2", + "illumina_id", + "fq1", + "fq2", "gds_outdir_dragen", "gds_outdir_multiqc", ) diff --git a/R/meta_wts_alignment_qc.R b/R/meta_wts_alignment_qc.R index ddba214..b236f8d 100644 --- a/R/meta_wts_alignment_qc.R +++ b/R/meta_wts_alignment_qc.R @@ -27,22 +27,36 @@ meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") { meta_io_fromjson() |> dplyr::mutate( # input - rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")), + rgid = purrr::map_chr(.data$input, list("fastq_list_rows", "rgid")), rgsm = purrr::map_chr(.data$input, list("fastq_list_rows", "rgsm")), + rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")), lane = purrr::map_int(.data$input, list("fastq_list_rows", "lane")), lane = as.character(.data$lane), + # read_1/read_2 are dfs + fq1 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_1", "location"), .default = NA), + fq2 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_2", "location"), .default = NA), # output gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA), gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA), SubjectID = sub("umccr__automated__wts_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name), + ) |> + tidyr::separate_wider_delim( + cols = "rgid", delim = ".", + names = c("index1", "index2", "lane2", "illumina_id", "sample_lib_id") ) + d |> dplyr::select( dplyr::all_of(meta_main_cols()), "SubjectID", LibraryID = "rglb", SampleID = "rgsm", - Lane = "lane", + "lane", + "index1", + "index2", + "illumina_id", + "fq1", + "fq2", "gds_outdir_dragen", "gds_outdir_multiqc", ) From 4e9e5fbc06ea50879a7b8107f0e69a95ee2ba21a Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Wed, 24 Jul 2024 00:47:20 +1000 Subject: [PATCH 5/8] seqrunsum: show FASTQ summary --- R/meta_wgs_alignment_qc.R | 2 +- R/meta_wts_alignment_qc.R | 2 +- inst/reports/seqrunsum/funcs.R | 38 ++++++++ inst/reports/seqrunsum/render.sh | 4 +- inst/reports/seqrunsum/report.qmd | 88 ++++++++++++++----- man/portaldb_query_limsrow.Rd | 6 +- ...ytest-testexamples-meta_wgs_alignment_qc.R | 2 +- ...ytest-testexamples-meta_wts_alignment_qc.R | 2 +- 8 files changed, 116 insertions(+), 28 deletions(-) diff --git a/R/meta_wgs_alignment_qc.R b/R/meta_wgs_alignment_qc.R index dd985fe..70284f0 100644 --- a/R/meta_wgs_alignment_qc.R +++ b/R/meta_wgs_alignment_qc.R @@ -10,7 +10,7 @@ #' readr::read_rds() #' (m <- meta_wgs_alignment_qc(pmeta)) #' @testexamples -#' expect_equal("Lane" %in% colnames(m), TRUE) +#' expect_equal("lane" %in% colnames(m), TRUE) #' @export meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") { # retrieve workflow runs with the given type and status diff --git a/R/meta_wts_alignment_qc.R b/R/meta_wts_alignment_qc.R index b236f8d..715f2fa 100644 --- a/R/meta_wts_alignment_qc.R +++ b/R/meta_wts_alignment_qc.R @@ -10,7 +10,7 @@ #' readr::read_rds() #' (m <- meta_wts_alignment_qc(pmeta)) #' @testexamples -#' expect_equal("Lane" %in% colnames(m), TRUE) +#' expect_equal("lane" %in% colnames(m), TRUE) #' @export meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") { # retrieve workflow runs with the given type and status diff --git a/inst/reports/seqrunsum/funcs.R b/inst/reports/seqrunsum/funcs.R index 6d41978..2ebc3e1 100644 --- a/inst/reports/seqrunsum/funcs.R +++ b/inst/reports/seqrunsum/funcs.R @@ -1,4 +1,42 @@ funcs <- list( + gds_fastqlistcsv_read = function(gdsdir, token, page_size = 20) { + .read_fastqlist <- function(x) { + nms <- tibble::tribble( + ~new_nm, ~old_nm, ~class, + "rgid", "RGID", "c", + "SampleID_LibraryID", "RGSM", "c", + "rglb", "RGLB", "c", + "lane", "Lane", "c", + "1", "Read1File", "c", + "2", "Read2File", "c" + ) + lookup <- tibble::deframe(nms[c("new_nm", "old_nm")]) + d <- readr::read_csv(x, col_types = readr::cols(.default = "c")) + assertthat::assert_that(all(colnames(d) == nms[["old_nm"]])) + d |> + dplyr::rename(dplyr::all_of(lookup)) |> + dplyr::mutate( + SampleID = sub("(.*)_(L.*)", "\\1", .data$SampleID_LibraryID), + LibraryID = sub("(.*)_(L.*)", "\\2", .data$SampleID_LibraryID), + topup = grepl("topup", .data$LibraryID) + ) |> + dplyr::select("rgid", "SampleID", "LibraryID", "lane", "1", "2", "topup") |> + tidyr::pivot_longer(c("1", "2"), names_to = "read", values_to = "path") + } + regex <- tibble::tribble( + ~regex, ~fun, + "fastq_list\\.csv$", "fastq_list" + ) + g <- dracarys::gds_files_list_filter_relevant( + gdsdir = gdsdir, token = token, pattern = NULL, include_url = TRUE, + page_size = page_size, regexes = regex + ) + assertthat::assert_that( + nrow(g) == 1, + all(colnames(g) == c("type", "bname", "size", "file_id", "path", "presigned_url")) + ) + .read_fastqlist(g$presigned_url) + }, #----# kable_empty_wf = function(wf) { kableExtra::kbl(NULL, caption = glue("NO {wf} WORKFLOWS WERE RUN"), escape = FALSE) |> diff --git a/inst/reports/seqrunsum/render.sh b/inst/reports/seqrunsum/render.sh index 813d680..f58cc92 100644 --- a/inst/reports/seqrunsum/render.sh +++ b/inst/reports/seqrunsum/render.sh @@ -1,5 +1,5 @@ -date_start="2024-06-28" -date_end="2024-07-01" +date_start="2024-07-20" +date_end="2024-07-22" out="seqrunsum_${date_start}_${date_end}.html" quarto render report.qmd \ diff --git a/inst/reports/seqrunsum/report.qmd b/inst/reports/seqrunsum/report.qmd index 723e6d9..05e2207 100644 --- a/inst/reports/seqrunsum/report.qmd +++ b/inst/reports/seqrunsum/report.qmd @@ -27,8 +27,8 @@ format: body-width: 1300px params: title: "UMCCR PortalDB Workflow Summary" - date_start: "XXXX-XX-XX" - date_end: "XXXX-XX-XX" + date_start: "2024-07-20" + date_end: "2024-07-22" --- ```{r} @@ -55,6 +55,19 @@ params: set.seed(42) ``` +```{r} +#| label: aws_connect +invisible(capture.output(rportal::awsvault_profile("upro"))) +``` + +```{r} +#| label: ica_token +# required for directly parsing fastq_list.csv and listing FASTQs on GDS +ica_token <- Sys.getenv("ICA_ACCESS_TOKEN") +``` + + + ```{r funcs_source} source(here("inst/reports/seqrunsum/funcs.R")) ``` @@ -85,12 +98,6 @@ wfs <- tibble::tribble( mutate(func = glue("rportal::meta_{name}")) ``` -```{r} -#| label: aws_connect -invisible(capture.output(rportal::awsvault_profile("upro"))) -``` - - ```{r} #| label: query_workflow_table #| message: false @@ -195,6 +202,59 @@ funcs$sbj_wf_count_tbl(pmeta_sumy) funcs$plot_vistime(pmeta_sumy) ``` +## FASTQ Summary + +Check FASTQs generated via the `bcl_convert` workflows. + +```{r} +wf <- "bcl_convert" +if (!chunks1[[wf]]) { + funcs$kable_empty_wf(wf) +} +``` + +```{r eval=chunks1[[wf]]} +#| label: fastq_summary + +# first list FASTQs in the bcl_convert output directories +gds_outdirs_fastq <- pmeta_tidy[[wf]] |> + select("gds_outdirs_fastq") |> + tidyr::unnest("gds_outdirs_fastq") |> + distinct() |> + mutate(outdirs_reports = file.path(.data$gds_outdirs_fastq, "Reports")) +gds_fastqs1 <- gds_outdirs_fastq |> + select("gds_outdirs_fastq") |> + rowwise() |> + mutate( + list_fqs = list( + dracarys::gds_files_list_fastq( + gdsdir = .data$gds_outdirs_fastq, token = ica_token, page_size = 200 + ) + ) + ) |> + ungroup() |> + tidyr::unnest("list_fqs") |> + select("path", "size", "size_chr", "size_num") +# now grab the Reports/fastq_list.csv files to check consistency +# across listed FASTQs and found FASTQs +gds_fastqs2 <- gds_outdirs_fastq |> + rowwise() |> + mutate(fq = list(funcs$gds_fastqlistcsv_read(.data$outdirs_reports, token = ica_token, page_size = 20))) |> + ungroup() |> + tidyr::unnest("fq") |> + mutate(path = file.path(dirname(.data$gds_outdirs_fastq), .data$path)) |> + select("LibraryID", "SampleID", "path", "topup", "read", "lane", "rgid") + +assertthat::assert_that(nrow(gds_fastqs1) == nrow(gds_fastqs2)) +assertthat::assert_that(all(gds_fastqs1$path %in% gds_fastqs2$path)) +# now join to get the file sizes +fq <- gds_fastqs2 |> + left_join(gds_fastqs1, by = "path") |> + select("LibraryID", "SampleID", "path", "size_num", "size_chr", "topup", "rgid", "read", "lane") + +funcs$dt_view(fq, id = "FASTQs") +``` + ## Workflow Metadata ```{r} @@ -216,7 +276,6 @@ SubjectID_def <- reactable::colDef(html = TRUE, minWidth = 120) ::: {.panel-tabset .nav-pills} ```{r} -#| echo: false wf <- "bcl_convert" ``` @@ -279,7 +338,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "wgs_alignment_qc" ``` @@ -309,7 +367,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "wts_alignment_qc" ``` @@ -339,7 +396,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "wts_tumor_only" ``` @@ -369,7 +425,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "wgs_tumor_normal" ``` @@ -404,7 +459,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "umccrise" ``` @@ -439,7 +493,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "rnasum" ``` @@ -469,7 +522,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "star_alignment" ``` @@ -499,7 +551,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "oncoanalyser_wts" ``` @@ -530,7 +581,6 @@ pmeta_tidy[[wf]] |> ```{r} -#| echo: false wf <- "oncoanalyser_wgs" ``` @@ -565,7 +615,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "oncoanalyser_wgts_existing_both" ``` @@ -601,7 +650,6 @@ pmeta_tidy[[wf]] |> ``` ```{r} -#| echo: false wf <- "sash" ``` diff --git a/man/portaldb_query_limsrow.Rd b/man/portaldb_query_limsrow.Rd index 1fcf1cc..cba3c96 100644 --- a/man/portaldb_query_limsrow.Rd +++ b/man/portaldb_query_limsrow.Rd @@ -25,8 +25,10 @@ libids <- shQuote(paste(c("L2400340", "L2400256"), collapse = "|")) query1 <- glue("WHERE REGEXP_LIKE(\"library_id\", {libids});") portaldb_query_limsrow(query1) sbjids <- paste(c("SBJ04470", "SBJ04487", "SBJ04488"), collapse = "|") -query2 <- glue("WHERE REGEXP_LIKE(\"subject_id\", '{sbjids}') AND \"type\" = 'WGS' ", - "AND \"phenotype\" = 'tumor' ORDER BY \"subject_id\" DESC;") +query2 <- glue( + "WHERE REGEXP_LIKE(\"subject_id\", '{sbjids}') AND \"type\" = 'WGS' ", + "AND \"phenotype\" = 'tumor' ORDER BY \"subject_id\" DESC;" +) d <- portaldb_query_limsrow(query2) # get tumor libids for each sbjid d |> dplyr::select(subject_id, library_id) diff --git a/tests/testthat/test-roxytest-testexamples-meta_wgs_alignment_qc.R b/tests/testthat/test-roxytest-testexamples-meta_wgs_alignment_qc.R index bfe4769..e00898b 100644 --- a/tests/testthat/test-roxytest-testexamples-meta_wgs_alignment_qc.R +++ b/tests/testthat/test-roxytest-testexamples-meta_wgs_alignment_qc.R @@ -8,6 +8,6 @@ test_that("Function meta_wgs_alignment_qc() @ L15", { system.file(package = "rportal") |> readr::read_rds() (m <- meta_wgs_alignment_qc(pmeta)) - expect_equal("Lane" %in% colnames(m), TRUE) + expect_equal("lane" %in% colnames(m), TRUE) }) diff --git a/tests/testthat/test-roxytest-testexamples-meta_wts_alignment_qc.R b/tests/testthat/test-roxytest-testexamples-meta_wts_alignment_qc.R index 7434438..f0a01ad 100644 --- a/tests/testthat/test-roxytest-testexamples-meta_wts_alignment_qc.R +++ b/tests/testthat/test-roxytest-testexamples-meta_wts_alignment_qc.R @@ -8,6 +8,6 @@ test_that("Function meta_wts_alignment_qc() @ L15", { system.file(package = "rportal") |> readr::read_rds() (m <- meta_wts_alignment_qc(pmeta)) - expect_equal("Lane" %in% colnames(m), TRUE) + expect_equal("lane" %in% colnames(m), TRUE) }) From 5c3b2c5e103c57662755d28d9aba0cf1df79eb83 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Thu, 25 Jul 2024 20:27:39 +1000 Subject: [PATCH 6/8] seqrunsum: show FASTQ summary table --- inst/reports/seqrunsum/report.qmd | 55 +++++++++++++++++++------------ 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/inst/reports/seqrunsum/report.qmd b/inst/reports/seqrunsum/report.qmd index 05e2207..2931535 100644 --- a/inst/reports/seqrunsum/report.qmd +++ b/inst/reports/seqrunsum/report.qmd @@ -138,9 +138,12 @@ lims <- lims_raw |> SubjectID = "subject_id", SampleID = "sample_id", LibraryID = "library_id", ExternalSubjectID = "external_subject_id", ProjectOwner = "project_owner", ProjectName = "project_name", - Type = "type", Assay = "assay", Phenotype = "phenotype", - Source = "source", Quality = "quality", Topup = "topup", Workflow = "workflow" - ) + Type = "type", + Phenotype = "phenotype", + Topup = "topup", Workflow = "workflow", + Assay = "assay", Source = "source", + ) |> + distinct() ``` ```{r} @@ -207,6 +210,7 @@ funcs$plot_vistime(pmeta_sumy) Check FASTQs generated via the `bcl_convert` workflows. ```{r} +SubjectID_def <- reactable::colDef(html = TRUE, minWidth = 120) wf <- "bcl_convert" if (!chunks1[[wf]]) { funcs$kable_empty_wf(wf) @@ -250,9 +254,19 @@ assertthat::assert_that(all(gds_fastqs1$path %in% gds_fastqs2$path)) # now join to get the file sizes fq <- gds_fastqs2 |> left_join(gds_fastqs1, by = "path") |> - select("LibraryID", "SampleID", "path", "size_num", "size_chr", "topup", "rgid", "read", "lane") + select("LibraryID", "SampleID", "path", "size_num", "size_chr", "topup", "rgid", "read", "lane") |> + left_join( + lims |> + select("SubjectID", "SampleID", "LibraryID") |> + distinct(), + by = c("LibraryID", "SampleID") + ) |> + select("SubjectID", "LibraryID", "SampleID", "size_num", "size_chr", "path", "topup", "rgid", "read", "lane", everything()) |> + left_join(clrs1 |> select("sbjid", "sbj_url"), by = c("SubjectID" = "sbjid")) |> + mutate(SubjectID = if_else(is.na(.data$sbj_url), funcs$get_sbj_url(.data$SubjectID), .data$sbj_url)) |> + arrange(desc(SubjectID)) -funcs$dt_view(fq, id = "FASTQs") +funcs$dt_view(fq, id = "FASTQs", columns = list(SubjectID = SubjectID_def)) ``` ## Workflow Metadata @@ -270,7 +284,6 @@ end_status_def <- reactable::colDef( list(color = color, fontweight = "bold") } ) -SubjectID_def <- reactable::colDef(html = TRUE, minWidth = 120) ``` ::: {.panel-tabset .nav-pills} @@ -301,7 +314,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "portal_run_id", "durationMin", "runfolder_name", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "batch_name", "Type", "Assay", "Source", "Quality", "Workflow", + "ProjectOwner", "ProjectName", "batch_name", "Type", "Assay", "Source", "Workflow", "start", "end", contains("gds_outdir") ) |> arrange(portal_run_id, desc(SubjectID), start) |> @@ -330,7 +343,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "gds_outdir" ) |> arrange(desc(SubjectID), start) |> @@ -358,8 +371,8 @@ pmeta_tidy[[wf]] |> durationMin = round(as.numeric(difftime(end, start, units = "mins"))) ) |> select( - "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "Lane", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "lane", "ExternalSubjectID", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "gds_outdir_dragen" ) |> arrange(desc(SubjectID), start) |> @@ -387,8 +400,8 @@ pmeta_tidy[[wf]] |> durationMin = round(as.numeric(difftime(end, start, units = "mins"))) ) |> select( - "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "Lane", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "lane", "ExternalSubjectID", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "gds_outdir_dragen" ) |> arrange(desc(SubjectID), start) |> @@ -417,7 +430,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", "ProjectOwner", "ProjectName", - "Type", "Assay", "Source", "Quality", "Workflow", + "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "gds_outdir_dragen" ) |> arrange(desc(SubjectID), start) |> @@ -451,7 +464,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", "ProjectOwner", "ProjectName", - "Type", "Assay", "Source", "Quality", "Workflow", + "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "gds_outdir_dragen_somatic", "gds_outdir_dragen_germline" ) |> arrange(desc(SubjectID), start) |> @@ -485,7 +498,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "gds_outdir_umccrise" ) |> arrange(desc(SubjectID), start) |> @@ -514,7 +527,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "rnasum_dataset", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "gds_outdir_rnasum", ) |> arrange(desc(SubjectID), start) |> @@ -543,7 +556,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", "ProjectOwner", "ProjectName", - "Type", "Assay", "Source", "Quality", "Workflow", + "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "s3_outdir_star" ) |> arrange(desc(SubjectID), start) |> @@ -572,7 +585,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", "ProjectOwner", "ProjectName", - "Type", "Assay", "Source", "Quality", "Workflow", + "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "s3_outdir_oncoanalyser" ) |> arrange(desc(SubjectID), start) |> @@ -607,7 +620,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "s3_outdir_oncoanalyser" ) |> arrange(desc(SubjectID), start) |> @@ -642,7 +655,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "WGTS", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "s3_outdir_oncoanalyser" ) |> arrange(desc(SubjectID), start) |> @@ -676,7 +689,7 @@ pmeta_tidy[[wf]] |> ) |> select( "end_status", "durationMin", "SubjectID", "LibraryID", "SampleID", "Phenotype", "ExternalSubjectID", - "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Quality", "Workflow", + "ProjectOwner", "ProjectName", "Type", "Assay", "Source", "Workflow", "portal_run_id", "wfr_id", "start", "end", "s3_outdir_sash" ) |> arrange(desc(SubjectID), start) |> From 6867bbd204401ab2d8bc9d987ae8f1012878bea4 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Mon, 29 Jul 2024 10:07:43 +1000 Subject: [PATCH 7/8] fix join --- inst/reports/seqrunsum/render.sh | 4 ++-- inst/reports/seqrunsum/report.qmd | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/inst/reports/seqrunsum/render.sh b/inst/reports/seqrunsum/render.sh index f58cc92..e63e218 100644 --- a/inst/reports/seqrunsum/render.sh +++ b/inst/reports/seqrunsum/render.sh @@ -1,5 +1,5 @@ -date_start="2024-07-20" -date_end="2024-07-22" +date_start="2024-07-27" +date_end="2024-07-29" out="seqrunsum_${date_start}_${date_end}.html" quarto render report.qmd \ diff --git a/inst/reports/seqrunsum/report.qmd b/inst/reports/seqrunsum/report.qmd index 2931535..6bbee15 100644 --- a/inst/reports/seqrunsum/report.qmd +++ b/inst/reports/seqrunsum/report.qmd @@ -27,8 +27,8 @@ format: body-width: 1300px params: title: "UMCCR PortalDB Workflow Summary" - date_start: "2024-07-20" - date_end: "2024-07-22" + date_start: "XXXX-XX-XX" + date_end: "XXXX-XX-XX" --- ```{r} @@ -66,8 +66,6 @@ invisible(capture.output(rportal::awsvault_profile("upro"))) ica_token <- Sys.getenv("ICA_ACCESS_TOKEN") ``` - - ```{r funcs_source} source(here("inst/reports/seqrunsum/funcs.R")) ``` @@ -257,11 +255,14 @@ fq <- gds_fastqs2 |> select("LibraryID", "SampleID", "path", "size_num", "size_chr", "topup", "rgid", "read", "lane") |> left_join( lims |> - select("SubjectID", "SampleID", "LibraryID") |> + select("SubjectID", "SampleID", "LibraryID", "Type", "Phenotype", "Workflow", "Assay") |> distinct(), by = c("LibraryID", "SampleID") ) |> - select("SubjectID", "LibraryID", "SampleID", "size_num", "size_chr", "path", "topup", "rgid", "read", "lane", everything()) |> + select( + "SubjectID", "LibraryID", "SampleID", "size_num", "size_chr", "Type", "Phenotype", + "Workflow", "Assay", "path", "topup", "rgid", "read", "lane", everything() + ) |> left_join(clrs1 |> select("sbjid", "sbj_url"), by = c("SubjectID" = "sbjid")) |> mutate(SubjectID = if_else(is.na(.data$sbj_url), funcs$get_sbj_url(.data$SubjectID), .data$sbj_url)) |> arrange(desc(SubjectID)) From 5b78dcbba67ac7ec4a8c02454df3e2e23fa43a58 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Mon, 5 Aug 2024 09:52:21 +1000 Subject: [PATCH 8/8] handle rnasum v1 renamed keys (fixes #16) --- R/meta_rnasum.R | 18 ++++++++++++++++++ inst/reports/seqrunsum/render.sh | 4 ++-- inst/reports/seqrunsum/report.qmd | 4 ++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/R/meta_rnasum.R b/R/meta_rnasum.R index 7c4febb..08e36c8 100644 --- a/R/meta_rnasum.R +++ b/R/meta_rnasum.R @@ -28,12 +28,30 @@ meta_rnasum <- function(pmeta, status = "Succeeded") { meta_io_fromjson() |> dplyr::mutate( # input + # renamed in v1.1.0 gds_indir_dragen = purrr::map_chr(.data$input, list("dragen_transcriptome_directory", "location"), .default = NA), + gds_indir_dragen = ifelse( + is.na(gds_indir_dragen), + purrr::map_chr(.data$input, list("dragen_wts_dir", "location"), .default = NA), + gds_indir_dragen + ), gds_indir_umccrise = purrr::map_chr(.data$input, list("umccrise_directory", "location"), .default = NA), + # renamed in v1.1.0 gds_indir_arriba = purrr::map_chr(.data$input, list("arriba_directory", "location"), .default = NA), + gds_indir_arriba = ifelse( + is.na(gds_indir_arriba), + purrr::map_chr(.data$input, list("arriba_dir", "location"), .default = NA), + gds_indir_arriba + ), rnasum_sample_name = purrr::map_chr(.data$input, "sample_name", .default = NA), rnasum_dataset = purrr::map_chr(.data$input, "dataset", .default = NA), rnasum_report_dir = purrr::map_chr(.data$input, "report_directory", .default = NA), + # renamed in v1.1.0 + rnasum_report_dir = ifelse( + is.na(rnasum_report_dir), + purrr::map_chr(.data$input, "report_dir", .default = NA), + rnasum_report_dir + ), sbjid1 = sub("(SBJ.*)__L.*", "\\1", .data$rnasum_report_dir), libid1 = sub("(SBJ.*)__(L.*)", "\\2", .data$rnasum_report_dir), # output diff --git a/inst/reports/seqrunsum/render.sh b/inst/reports/seqrunsum/render.sh index e63e218..25b00da 100644 --- a/inst/reports/seqrunsum/render.sh +++ b/inst/reports/seqrunsum/render.sh @@ -1,5 +1,5 @@ -date_start="2024-07-27" -date_end="2024-07-29" +date_start="2024-08-03" +date_end="2024-08-05" out="seqrunsum_${date_start}_${date_end}.html" quarto render report.qmd \ diff --git a/inst/reports/seqrunsum/report.qmd b/inst/reports/seqrunsum/report.qmd index 6bbee15..1092eb5 100644 --- a/inst/reports/seqrunsum/report.qmd +++ b/inst/reports/seqrunsum/report.qmd @@ -247,8 +247,8 @@ gds_fastqs2 <- gds_outdirs_fastq |> mutate(path = file.path(dirname(.data$gds_outdirs_fastq), .data$path)) |> select("LibraryID", "SampleID", "path", "topup", "read", "lane", "rgid") -assertthat::assert_that(nrow(gds_fastqs1) == nrow(gds_fastqs2)) -assertthat::assert_that(all(gds_fastqs1$path %in% gds_fastqs2$path)) +stopifnot(nrow(gds_fastqs1) == nrow(gds_fastqs2)) +stopifnot(all(gds_fastqs1$path %in% gds_fastqs2$path)) # now join to get the file sizes fq <- gds_fastqs2 |> left_join(gds_fastqs1, by = "path") |>