Skip to content

Commit

Permalink
Make complementSumstats.R write file output by default (#159)
Browse files Browse the repository at this point in the history
* minor update

* updated changelog

* rename_columns function

* Reorganized test run (#160)

* set --file-output w. nargs=1 as last arg

* test rename_columns function

---------

Co-authored-by: Andreas Jangmo <[email protected]>
  • Loading branch information
espenhgn and deepchocolate authored Apr 25, 2023
1 parent f7329b8 commit eea44e4
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ If MD5 sum is not listed for a certain release then it means that the container
### Fixed
- Made ``usecases/LDpred2/complementSumstats.R`` write output file by default, not stdout.
- Fixed print statement in ``usecases/LDpred2/complementSumstats.R`` causing crash w. ``--file-output`` arg.
- Fixed ``ldpred2.R`` script in case ``--file-pheno``/``--col-pheno``/``--col-pheno-from-fam`` args were used, by removing these options altogether.
- Use [packagemanager.rstudio.com/cran/__linux__/focal/2023-02-16](https://packagemanager.rstudio.com/cran/__linux__/focal/2023-02-16) as main R package repo
Expand Down
3 changes: 2 additions & 1 deletion usecases/LDpred2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ if [ ! -d $dirOutput ]; then mkdir $dirOutput; fi;
for fileSumstats in `ls $dirSumstats`; do
echo "Processing file $fileSumstats"
$RSCRIPT $COMORMENT/containers/LDpred2/complementSumstats.R --col-sumstats-snp-id MarkerName --sumstats $dirSumstats/$fileSumstats | gzip -c > $dirOutput/$fileSumstats
$RSCRIPT $COMORMENT/containers/LDpred2/complementSumstats.R --col-sumstats-snp-id MarkerName --sumstats $dirSumstats/$fileSumstats --file-output $dirOutput/$fileSumstats
gzip $dirOutput/$fileSumstats
done
```

Expand Down
18 changes: 13 additions & 5 deletions usecases/LDpred2/complementSumstats.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
library(argparser, quietly = T)
library(argparser, quietly=T)
library(dplyr, quietly=T)
# Source functions
coms <- commandArgs()
coms <- coms[substr(coms, 1, 7) == '--file=']
Expand All @@ -11,9 +12,10 @@ par <- add_argument(par, '--reference', nargs=1, default='/REF/hrc/HRC.r1-1.GRCh
help='Reference data file to complement summary statistics with. Anything accepted by bigreadr::fread2')
par <- add_argument(par, '--col-sumstats-snp-id', nargs=1, default='SNP', help='SNP ID (RSID) column in sumstats')
par <- add_argument(par, '--col-reference-snp-id', nargs=1, default='ID', help='SNP ID (RSID) column in reference file')
par <- add_argument(par, '--columns-append', nargs=Inf, help='Columns from reference data to append to sumstats. Defaults to #CHROM and POS')
par <- add_argument(par, '--file-output', nargs=1, default='', help='Write output to file. Defaults to stdout')
par <- add_argument(par, '--columns-append', nargs=Inf, default=c('#CHROM', 'POS'), help='Columns from reference data to append to sumstats. Defaults to #CHROM and POS')
par <- add_argument(par, '--column-names-output', nargs=Inf, default=c('CHR', 'POS'), help='Column names in output. Defaults to column names in sumstats. Defaults to CHR and POS')
par <- add_argument(par, '--file-output-col-sep', nargs=1, default='\t', help='Column separator in output. Anything accepted by bigreadr::fwrite2')
par <- add_argument(par, '--file-output', nargs=1, default=tempfile(), help='Output file name. Defaults to a temporary file (<tempdir>/<tempfilename>)')
parsed <- parse_args(par)
fileSumstats <- parsed$sumstats
reference <- parsed$reference
Expand All @@ -23,13 +25,19 @@ noPrint <- ifelse(fileOut == '', T, F)
colSumstatsSnpId <- parsed$col_sumstats_snp_id
colRefSnpId <- parsed$col_reference_snp_id
colsAppend <- parsed$columns_append
if (isVarNA(colsAppend)) colsAppend <- c('#CHROM', 'POS')

# assert that length of columns-append and column-names-output are the same
if (length(colsAppend) != length(parsed$column_names_output)) {
stop('Length of columns-append and column-names-output must be the same')
}

if (!noPrint) cat('Reading sumstats', fileSumstats, '\n')
sumstats <- bigreadr::fread2(fileSumstats)
if (!noPrint) cat('Reading reference file', reference, '\n')
reference <- bigreadr::fread2(reference)
if (!noPrint) cat('Complementing sumstats\n')
sumstats <- complementSumstats(sumstats, reference, colRsidSumstats=colSumstatsSnpId, colRsidRef=colRefSnpId, colsKeepReference=colsAppend)
if (!noPrint) cat('Writing output', fileOut, '\n')
if (!noPrint) cat('Renaming columns\n')
sumstats <- rename_columns(sumstats, colsAppend, parsed$column_names_output)
if (!noPrint) cat('Writing output file', fileOut, '\n')
bigreadr::fwrite2(sumstats, file=fileOut, sep=fileOutColSep)
15 changes: 14 additions & 1 deletion usecases/LDpred2/fun.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,17 @@ complementSumstats <- function(sumstats, reference, colRsidSumstats='SNP', colRs
res <- data.table::merge.data.table(sumstats, reference[,colsRef], by.x=colRsidSumstats, by.y=colRsidRef, all.x=T)
if (nrow(res) != nrRows) warning('The merge resulted in ', nrow(res), ' rows but input contained ', nrRows, ' rows')
res
}
}

# Rename columns in data.frame
#' @param df A data.frame
#' @param old_names A vector of old column names
#' @param new_names A vector of new column names
#' @return A data.frame with renamed columns
rename_columns <- function(df, old_names, new_names) {
if (length(old_names) != length(new_names)) {
stop("The length of the old_names and new_names lists must be the same.")
}
colnames(df)[match(old_names, colnames(df))] <- new_names
return(df)
}
6 changes: 3 additions & 3 deletions usecases/LDpred2_test/scripts/sumstats.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
fileSumstats=$DIR_TESTS/unittest/data/sumstats.txt
fileReference=$DIR_TESTS/unittest/data/hrc37.txt
# The tests are not run using the full HRC data to speed things up
COM="$RSCRIPT $DIR_SCRIPTS/complementSumstats.R --sumstats $fileSumstats --reference $fileReference"
COM="$RSCRIPT $DIR_SCRIPTS/complementSumstats.R --reference $fileReference"
# Output to stdout
dump=$( { $COM; } 2>&1 )
dump=$( { $COM --sumstats $fileSumstats; } 2>&1 )
if [ $? -eq 1 ]; then echo "$dump"; exit; fi
# Add up som more columns than the defaults
dump=$( { $COM --columns-append "#CHROM" POS AF; } 2>&1 )
dump=$( { $COM --columns-append "#CHROM" POS AF --sumstats $fileSumstats ; } 2>&1 )
if [ $? -eq 1 ]; then echo "$dump"; exit; fi
8 changes: 8 additions & 0 deletions usecases/LDpred2_test/unittest/fun.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,11 @@ test_that("Test functions used when merging score to existing files", {
expect_warning(verifyScoreOutputFile(fileSumstats, 'OR', c('CHR_ORG', 'SNP')))
expect_no_error(verifyScoreOutputFile(fileSumstats, 'score', c('CHR_ORG', 'SNP')))
})

# Test that rename_columns works as expected
test_that("Test rename_columns", {
expect_equal(rename_columns(sumstats, c('A1', 'A2'), c('A1', 'A2')), sumstats)
expect_true(hasAllColumns(rename_columns(sumstats, c('A1', 'A2', 'N'), c('EffectAllele', 'OtherAllele', 'Neff')), c('EffectAllele', 'OtherAllele', 'Neff')))
expect_error(rename_columns(sumstats, c('A1', 'A2'), c('A1', 'A2', 'N')))
expect_error(rename_columns(sumstats, c('A1', 'A2', 'N'), c('A1', 'A2')))
})

0 comments on commit eea44e4

Please sign in to comment.