Skip to content

Commit

Permalink
Merge pull request #17 from umccr/seqrunsum_cli
Browse files Browse the repository at this point in the history
Add FASTQ summary, fix colours, handle RNAsum v1
  • Loading branch information
pdiakumis authored Aug 6, 2024
2 parents 9b2742f + 5b78dcb commit c6a2245
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 63 deletions.
18 changes: 18 additions & 0 deletions R/meta_rnasum.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,30 @@ meta_rnasum <- function(pmeta, status = "Succeeded") {
meta_io_fromjson() |>
dplyr::mutate(
# input
# renamed in v1.1.0
gds_indir_dragen = purrr::map_chr(.data$input, list("dragen_transcriptome_directory", "location"), .default = NA),
gds_indir_dragen = ifelse(
is.na(gds_indir_dragen),
purrr::map_chr(.data$input, list("dragen_wts_dir", "location"), .default = NA),
gds_indir_dragen
),
gds_indir_umccrise = purrr::map_chr(.data$input, list("umccrise_directory", "location"), .default = NA),
# renamed in v1.1.0
gds_indir_arriba = purrr::map_chr(.data$input, list("arriba_directory", "location"), .default = NA),
gds_indir_arriba = ifelse(
is.na(gds_indir_arriba),
purrr::map_chr(.data$input, list("arriba_dir", "location"), .default = NA),
gds_indir_arriba
),
rnasum_sample_name = purrr::map_chr(.data$input, "sample_name", .default = NA),
rnasum_dataset = purrr::map_chr(.data$input, "dataset", .default = NA),
rnasum_report_dir = purrr::map_chr(.data$input, "report_directory", .default = NA),
# renamed in v1.1.0
rnasum_report_dir = ifelse(
is.na(rnasum_report_dir),
purrr::map_chr(.data$input, "report_dir", .default = NA),
rnasum_report_dir
),
sbjid1 = sub("(SBJ.*)__L.*", "\\1", .data$rnasum_report_dir),
libid1 = sub("(SBJ.*)__(L.*)", "\\2", .data$rnasum_report_dir),
# output
Expand Down
20 changes: 17 additions & 3 deletions R/meta_wgs_alignment_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' readr::read_rds()
#' (m <- meta_wgs_alignment_qc(pmeta))
#' @testexamples
#' expect_equal("Lane" %in% colnames(m), TRUE)
#' expect_equal("lane" %in% colnames(m), TRUE)
#' @export
meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") {
# retrieve workflow runs with the given type and status
Expand All @@ -27,22 +27,36 @@ meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") {
meta_io_fromjson() |>
dplyr::mutate(
# input
rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")),
rgid = purrr::map_chr(.data$input, list("fastq_list_rows", "rgid")),
rgsm = purrr::map_chr(.data$input, list("fastq_list_rows", "rgsm")),
rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")),
lane = purrr::map_int(.data$input, list("fastq_list_rows", "lane")),
lane = as.character(.data$lane),
# read_1/read_2 are dfs
fq1 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_1", "location"), .default = NA),
fq2 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_2", "location"), .default = NA),
# output
gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA),
gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
SubjectID = sub("umccr__automated__wgs_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
) |>
tidyr::separate_wider_delim(
cols = "rgid", delim = ".",
names = c("index1", "index2", "lane2", "illumina_id", "sample_lib_id")
)

d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"SubjectID",
LibraryID = "rglb",
SampleID = "rgsm",
Lane = "lane",
"lane",
"index1",
"index2",
"illumina_id",
"fq1",
"fq2",
"gds_outdir_dragen",
"gds_outdir_multiqc",
)
Expand Down
20 changes: 17 additions & 3 deletions R/meta_wts_alignment_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' readr::read_rds()
#' (m <- meta_wts_alignment_qc(pmeta))
#' @testexamples
#' expect_equal("Lane" %in% colnames(m), TRUE)
#' expect_equal("lane" %in% colnames(m), TRUE)
#' @export
meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") {
# retrieve workflow runs with the given type and status
Expand All @@ -27,22 +27,36 @@ meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") {
meta_io_fromjson() |>
dplyr::mutate(
# input
rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")),
rgid = purrr::map_chr(.data$input, list("fastq_list_rows", "rgid")),
rgsm = purrr::map_chr(.data$input, list("fastq_list_rows", "rgsm")),
rglb = purrr::map_chr(.data$input, list("fastq_list_rows", "rglb")),
lane = purrr::map_int(.data$input, list("fastq_list_rows", "lane")),
lane = as.character(.data$lane),
# read_1/read_2 are dfs
fq1 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_1", "location"), .default = NA),
fq2 = purrr::map_chr(.data$input, list("fastq_list_rows", "read_2", "location"), .default = NA),
# output
gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA),
gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
SubjectID = sub("umccr__automated__wts_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
) |>
tidyr::separate_wider_delim(
cols = "rgid", delim = ".",
names = c("index1", "index2", "lane2", "illumina_id", "sample_lib_id")
)

d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"SubjectID",
LibraryID = "rglb",
SampleID = "rgsm",
Lane = "lane",
"lane",
"index1",
"index2",
"illumina_id",
"fq1",
"fq2",
"gds_outdir_dragen",
"gds_outdir_multiqc",
)
Expand Down
44 changes: 41 additions & 3 deletions inst/reports/seqrunsum/funcs.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,42 @@
funcs <- list(
gds_fastqlistcsv_read = function(gdsdir, token, page_size = 20) {
.read_fastqlist <- function(x) {
nms <- tibble::tribble(
~new_nm, ~old_nm, ~class,
"rgid", "RGID", "c",
"SampleID_LibraryID", "RGSM", "c",
"rglb", "RGLB", "c",
"lane", "Lane", "c",
"1", "Read1File", "c",
"2", "Read2File", "c"
)
lookup <- tibble::deframe(nms[c("new_nm", "old_nm")])
d <- readr::read_csv(x, col_types = readr::cols(.default = "c"))
assertthat::assert_that(all(colnames(d) == nms[["old_nm"]]))
d |>
dplyr::rename(dplyr::all_of(lookup)) |>
dplyr::mutate(
SampleID = sub("(.*)_(L.*)", "\\1", .data$SampleID_LibraryID),
LibraryID = sub("(.*)_(L.*)", "\\2", .data$SampleID_LibraryID),
topup = grepl("topup", .data$LibraryID)
) |>
dplyr::select("rgid", "SampleID", "LibraryID", "lane", "1", "2", "topup") |>
tidyr::pivot_longer(c("1", "2"), names_to = "read", values_to = "path")
}
regex <- tibble::tribble(
~regex, ~fun,
"fastq_list\\.csv$", "fastq_list"
)
g <- dracarys::gds_files_list_filter_relevant(
gdsdir = gdsdir, token = token, pattern = NULL, include_url = TRUE,
page_size = page_size, regexes = regex
)
assertthat::assert_that(
nrow(g) == 1,
all(colnames(g) == c("type", "bname", "size", "file_id", "path", "presigned_url"))
)
.read_fastqlist(g$presigned_url)
},
#----#
kable_empty_wf = function(wf) {
kableExtra::kbl(NULL, caption = glue("<strong>NO {wf} WORKFLOWS WERE RUN</strong>"), escape = FALSE) |>
Expand Down Expand Up @@ -49,10 +87,10 @@ funcs <- list(
) |>
ungroup() |>
tidyr::unnest(clrs) |>
distinct(.data$clrs, .keep_all = TRUE) |>
pull(clrs)
max_col <- length(clrs)
stopifnot(nc <= max_col, nc > 0)
clrs[seq_len(nc)]
# recycle colour vector according to nc
rep_len(clrs, length.out = nc)
},
#----#
get_sbj_url = function(x, colour = NULL, account = "pro") {
Expand Down
10 changes: 5 additions & 5 deletions inst/reports/seqrunsum/render.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
start="2024-05-18"
end="2024-05-20"
out="seqrunsum_${start}_${end}.html"
date_start="2024-08-03"
date_end="2024-08-05"
out="seqrunsum_${date_start}_${date_end}.html"

quarto render report.qmd \
-P date_start:${start} \
-P date_end:${end} \
-P date_start:${date_start} \
-P date_end:${date_end} \
-o ${out} \
--output-dir nogit
Loading

0 comments on commit c6a2245

Please sign in to comment.