Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sample and library metadata to colData for merged objects #630

Merged
merged 20 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions .github/workflows/nextflow-stub-check.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

name: Check nextflow stub

on:
Expand All @@ -11,7 +10,6 @@ jobs:
nf-stub-check:
runs-on: ubuntu-22.04
steps:

- name: Checkout repo
uses: actions/checkout@v3

Expand All @@ -30,14 +28,18 @@ jobs:
with:
args: nextflow -log celltype-ref-run.log run build-celltype-ref.nf -stub -profile stub -ansi-log false

- name: Check Nextflow workflow for merging objects
uses: docker://nextflow/nextflow:21.10.6
with:
args: nextflow -log merge-run.log run merge.nf -stub -profile stub -ansi-log false --project STUBP01

- name: Join log files
if: ${{ !cancelled() }}
run: cat stub-run.log checkpoint-run.log celltype-ref-run.log > nextflow-runs.log
run: cat stub-run.log checkpoint-run.log celltype-ref-run.log merge-run.log > nextflow-runs.log

- name: Upload nextflow log
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v3
with:
name: nextflow-log
path: nextflow-runs.log

37 changes: 37 additions & 0 deletions bin/merge_sces.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ option_list <- list(
default = FALSE,
help = "Keep any altExp present in the merged object."
),
make_option(
opt_str = c("--multiplexed"),
action = "store_true",
default = FALSE,
help = "Indicates if the provided SCE's contain multiplexed data.
If so, the sample metadata will not be added to the colData."
),
make_option(
opt_str = c("-t", "--threads"),
type = "integer",
Expand Down Expand Up @@ -118,6 +125,36 @@ merged_sce <- scpcaTools::merge_sce_list(
include_altexp = opt$include_altexp
)

# add sample metadata to colData as long as there are no multiplexed data
if (!opt$multiplexed) {
merged_sce <- scpcaTools::metadata_to_coldata(
merged_sce,
join_columns = "library_id"
)

# remove sample metadata
metadata(merged_sce) <- metadata(merged_sce)[names(metadata(merged_sce)) != "sample_metadata"]
}

# grab technology and EFO from metadata$library_metadata
library_df <- names(input_sce_files) |>
purrr::map(\(library_id){
lib_meta <- metadata(merged_sce) |>
purrr::pluck("library_metadata", library_id)
data.frame(
library_id = library_id,
tech_version = lib_meta$tech_version,
assay_ontology_term_id = lib_meta$assay_ontology_term_id,
seq_unit = lib_meta$seq_unit
)
}) |>
dplyr::bind_rows()

# join tech and EFO with colData
colData(merged_sce) <- colData(merged_sce) |>
as.data.frame() |>
dplyr::left_join(library_df, by = c("library_id"), relationship = "one-to-one") |>
DataFrame(row.names = rownames(colData(merged_sce)))

# HVG selection ----------------------------------------------------------------

Expand Down
12 changes: 7 additions & 5 deletions bin/sce_to_anndata.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ format_czi <- function(sce) {
sce$library_id <- metadata(sce)$library_id
}

# add sample metadata to colData sce
sce <- scpcaTools::metadata_to_coldata(
sce,
join_columns = "library_id"
)
# if sample metadata is present, add to colData
if ("sample_metadata" %in% names(metadata(sce))) {
sce <- scpcaTools::metadata_to_coldata(
sce,
join_columns = "library_id"
)
}

# modify colData to be AnnData and CZI compliant
coldata_df <- colData(sce) |>
Expand Down
43 changes: 27 additions & 16 deletions merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ process merge_sce {
label 'mem_32'
publishDir "${params.results_dir}/merged/${merge_group_id}"
input:
tuple val(merge_group_id), val(has_adt), val(library_ids), path(scpca_nf_file)
tuple val(merge_group_id), val(has_adt), val(multiplexed), val(library_ids), path(scpca_nf_file)
output:
tuple val(merge_group_id), val(has_adt), path(merged_sce_file)
tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed)
script:
input_library_ids = library_ids.join(',')
input_sces = scpca_nf_file.join(',')
Expand All @@ -46,10 +46,11 @@ process merge_sce {
--output_sce_file "${merged_sce_file}" \
--n_hvg ${params.num_hvg} \
${has_adt ? "--include_altexp" : ''} \
${multiplexed ? "--multiplexed" : '' } \
--threads ${task.cpus}
"""
stub:
merged_sce_file = "${merge_group}_merged.rds"
merged_sce_file = "${merge_group_id}_merged.rds"
"""
touch ${merged_sce_file}
"""
Expand All @@ -59,15 +60,15 @@ process merge_sce {
// create merge report
process generate_merge_report {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.results_dir}/merged/${merge_group}"
publishDir "${params.results_dir}/merged/${merge_group_id}"
label 'mem_16'
input:
tuple val(merge_group_id), val(has_adt), path(merged_sce_file)
tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed)
path(report_template)
output:
path(merge_report)
script:
merge_report = "${merge_group}_summary_report.html"
merge_report = "${merge_group_id}_summary_report.html"
"""
Rscript -e "rmarkdown::render( \
'${report_template}', \
Expand All @@ -87,15 +88,15 @@ process generate_merge_report {
process export_anndata{
container params.SCPCATOOLS_CONTAINER
label 'mem_32'
tag "${merge_group}"
publishDir "${params.results_dir}/merged/${merge_group}", mode: 'copy'
tag "${merge_group_id}"
publishDir "${params.results_dir}/merged/${merge_group_id}", mode: 'copy'
input:
tuple val(merge_group), val(has_adt), path(merged_sce_file)
tuple path(merged_sce_file), val(merge_group_id), val(has_adt)
output:
tuple val(merge_group), path("${merge_group}_merged_*.hdf5")
tuple val(merge_group_id), path("${merge_group_id}_merged_*.hdf5")
script:
rna_hdf5_file = "${merge_group}_merged_rna.hdf5"
feature_hdf5_file = "${merge_group}_merged_adt.hdf5"
rna_hdf5_file = "${merge_group_id}_merged_rna.hdf5"
feature_hdf5_file = "${merge_group_id}_merged_adt.hdf5"
"""
sce_to_anndata.R \
--input_sce_file ${merged_sce_file} \
Expand All @@ -108,8 +109,8 @@ process export_anndata{
${has_adt ? "move_counts_anndata.py --anndata_file ${feature_hdf5_file}" : ''}
"""
stub:
rna_hdf5_file = "${merge_group}_merged_rna.hdf5"
feature_hdf5_file = "${merge_group}_merged_adt.hdf5"
rna_hdf5_file = "${merge_group_id}_merged_rna.hdf5"
feature_hdf5_file = "${merge_group_id}_merged_adt.hdf5"
"""
touch ${rna_hdf5_file}
${has_adt ? "touch ${feature_hdf5_file}" : ''}
Expand All @@ -133,14 +134,19 @@ workflow {
.collect{it.scpca_project_id}
.unique()

multiplex_projects = libraries_ch
.filter{it.technology.startsWith('cellhash')}
.collect{it.scpca_project_id}
.unique()

grouped_libraries_ch = libraries_ch
// only include single-cell/single-nuclei which ensures we don't try to merge libraries from spatial or bulk data
.filter{it.seq_unit in ['cell', 'nucleus']}
// create tuple of [project id, library_id, processed_sce_file]
.map{[
it.scpca_project_id,
it.scpca_library_id,
"${params.results_dir}/${it.scpca_project_id}/${it.scpca_sample_id}/${it.scpca_library_id}_processed.rds"
file("${params.results_dir}/${it.scpca_project_id}/${it.scpca_sample_id}/${it.scpca_library_id}_processed.rds")
]}
// only include libraries that have been processed through scpca-nf
.filter{file(it[2]).exists()}
Expand All @@ -152,6 +158,7 @@ workflow {
.map{project_id, library_id_list, sce_file_list -> tuple(
project_id,
project_id in adt_projects, // determines if altExp should be included in the merged object
project_id in multiplex_projects, // determines if sample metadata should be added to colData and to skip anndata
library_id_list,
sce_file_list
)}
Expand All @@ -162,5 +169,9 @@ workflow {
generate_merge_report(merge_sce.out, file(merge_template))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You will need to update the generate_merge_report process to expect the extra value in the tuple, right?


// export merged objects to AnnData
export_anndata(merge_sce.out)
anndata_ch = merge_sce.out
.filter{!it[3]} // remove multiplexed samples before export
.take(3) // keep everything but multiplexed

export_anndata(anndata_ch)
}
Empty file.
Empty file.
1 change: 1 addition & 0 deletions test/stub-run-metadata.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ STUBR12 STUBL11 STUBS12,STUBS13 STUBP06 10Xv3.1 cell stub test/runs/STUBR12 NA N
STUBR13 STUBL11 STUBS12,STUBS13 STUBP06 cellhash_10Xv3 cell stub test/runs/STUBR13 test/references/barcodes/cellhash.stub.tsv 2[1-15] NA NA NA
STUBR14 STUBL12 STUBS14 STUBP07 10Xv3.1 cell stub test/runs/STUBR14 NA NA NA NA test/celltypes/STUBL12-submitter-celltypes.tsv
STUBR15 STUBL13 STUBS15 STUBP08 10Xv3.1 cell stub test/runs/STUBR15 NA NA NA NA NA
STUBR16 STUBL16 STUBS16 STUBP01 10Xv2 cell stub test/runs/STUBR16 NA NA NA NA NA