Snakemake script rather than Rscript

AU-ENVS-Bioinformatics · Apr 23, 2024 · 54794ab · 54794ab
1 parent 2bb9386
commit 54794ab
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 27 deletions.
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -37,8 +37,8 @@ rule combine_outputs_split:
         "logs/emu/combined_split.log",
     conda:
         "envs/dplyr.yaml"
-    shell:
-        "Rscript --vanilla workflow/scripts/combine_outputs.R {input} {output} 2>&1 | tee {log}"
+    script:
+        "scripts/combine_outputs.R"
 
 rule abundance:
     input:

diff --git a/workflow/scripts/combine_outputs.R b/workflow/scripts/combine_outputs.R
@@ -1,30 +1,34 @@
 # Get list of files from command line
 library(dplyr)
-args <- commandArgs(trailingOnly = TRUE)
-infiles <- args[1:length(args)-1]
-outfile <- args[length(args)]
-# Read all TSV files into dataframes and combine them
-samples_names <- gsub(".tsv", "", basename(infiles))
-process_file <- function(infile) {
-    sample_name <- gsub(".tsv", "", basename(infile))
-    df <- infile %>%
-        read.table(, header = TRUE, sep = "\t", stringsAsFactors = FALSE) %>% 
-        select(-abundance)
-    # Replace estimated.counts with sample name
-    colnames(df)[grep("estimated.counts", colnames(df))] <- sample_name
-    df
-}
-dfs <- lapply(infiles, process_file)
+run <- function(infiles, outfile){
+    # Read all TSV files into dataframes and combine them
+    samples_names <- gsub(".tsv", "", basename(infiles))
+    process_file <- function(infile) {
+        sample_name <- gsub(".tsv", "", basename(infile))
+        df <- infile %>%
+            read.table(, header = TRUE, sep = "\t", stringsAsFactors = FALSE) %>% 
+            select(-abundance)
+        # Replace estimated.counts with sample name
+        colnames(df)[grep("estimated.counts", colnames(df))] <- sample_name
+        df
+    }
+    dfs <- lapply(infiles, process_file)
 
 
-#  Combine all dataframes into one using left_join with all columns that are not samples_names as keys
-taxa_cols <- setdiff(colnames(dfs[[1]]), samples_names)
-combined_df <- do.call(\(x, y) dplyr::full_join(x, y, by = taxa_cols), dfs)
-# Replace NA with 0 for every column in samples_names
-replace_na <- function(x) {
-    x[is.na(x)] <- 0
-    x
+    #  Combine all dataframes into one using left_join with all columns that are not samples_names as keys
+    taxa_cols <- setdiff(colnames(dfs[[1]]), samples_names)
+    combined_df <- do.call(\(x, y) dplyr::full_join(x, y, by = taxa_cols), dfs)
+    # Replace NA with 0 for every column in samples_names
+    replace_na <- function(x) {
+        x[is.na(x)] <- 0
+        x
+    }
+    combined_df[samples_names] <- lapply(combined_df[samples_names], replace_na)
+    # Write combined dataframe to file
+    write.table(combined_df, outfile, sep = "\t", row.names = FALSE)
 }
-combined_df[samples_names] <- lapply(combined_df[samples_names], replace_na)
-# Write combined dataframe to file
-write.table(combined_df, outfile, sep = "\t", row.names = FALSE)
+
+run(
+  as.character(snakemake@input), 
+  snakemake@output[[1]]
+)