From 8142b1aa643f6ac3ff977e82dfa6f29fdcb97aeb Mon Sep 17 00:00:00 2001 From: Colin Gross Date: Wed, 27 Jun 2018 14:16:51 -0400 Subject: [PATCH 1/3] Separate inputs per report type. Modify CLI script to accomodate API change. --- R/main.r | 40 ++++++++++++++++++++++++++-------------- bin/gocc.sh | 46 +++++++++++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/R/main.r b/R/main.r index f0cc240..0208dbb 100644 --- a/R/main.r +++ b/R/main.r @@ -1,46 +1,58 @@ #' @title Main #' @description Build all reports for input data in input directory #' @note This is the entry point for the application. -#' @param input_dir path to directory that contains clc.csv or hbpc.csv +#' @param ... paths to input files for clc, hbpc, or dementia reports #' @param config_path path to configuration file #' @param output_dir path to output directory. Defaults to input_dir #' @return boolean TRUE indicates successful run. #' @export #' -main <- function(input_dir, config_path, output_dir = NULL){ +main <- function(config_path, output_dir = NULL, ...){ + cat("\nDEBUG\n") + cat(config_path) + cat("\nDEBUG\n") + cat(output_dir) + cat("\n") + + input_args <- list(...) + # Ignore warnings options(warn = -1) - # Default to using same directory for input and output - if(is.null(output_dir)){ output_dir <- input_dir } + # Default to using working directory output + if(is.null(output_dir)){ output_dir <- getwd() } config <- read_config(config_path) - # Read in CLC and HBPC data frames + # Read in data cat("\n\n--- ReadingData\n") - clc_filename <- file.path(input_dir,"clc.csv") - hbpc_filename <- file.path(input_dir,"hbpc.csv") + clc_inpath <- input_args[['clc']] + if(!is.null(clc_inpath) || clc_inpath != ''){ + df_clc <- read_clc_data(input_args[['clc']]) + } - df_hbpc <- read_hbpc_data(hbpc_filename) - df_clc <- read_clc_data(clc_filename) + hbpc_inpath <- input_args[['hbpc']] + if(!is.null(hbpc_inpath) || hbpc_inpath != ''){ + df_hbpc <- read_hbpc_data(hbpc_inpath) + } - # Process Data + # Process data if(!is.null(df_hbpc)){ hbpc_df_list <- process_data(df_hbpc, envir=GOCC$HBPC) + report_all(hbpc_df_list, GOCC$HBPC, config, output_dir) }else{ - cat(paste("\nHBPC input file", hbpc_filename, "not present or malformed.\n")) + cat(paste("\nHBPC input file", hbpc_inpath, "not present or malformed.\n")) hbpc_df_list <- list() } if(!is.null(df_clc)){ clc_df_list <- process_data(df_clc, envir=GOCC$CLC) + report_all(clc_df_list, GOCC$CLC, config, output_dir) }else{ - cat(paste("\nCLC input file", clc_filename, "not present or malformed.\n")) + cat(paste("\nCLC input file", clc_inpath, "not present or malformed.\n")) clc_df_list <- list() } - report_all(hbpc_df_list, GOCC$HBPC, config, output_dir) - report_all(clc_df_list, GOCC$CLC, config, output_dir) cat("\n\nEnd of Line\n\n") } \ No newline at end of file diff --git a/bin/gocc.sh b/bin/gocc.sh index 3ce2c7a..93fd58d 100755 --- a/bin/gocc.sh +++ b/bin/gocc.sh @@ -8,17 +8,18 @@ command -v Rscript 1> /dev/null 2>&1 || \ read -r -d '' USE_MSG <<'HEREDOC' Usage: gocc.sh -h - gocc.sh input_dir config_file - gocc.sh -i [path to input dir] -c [path to config] - gocc.sh --input [path to input dir] --config [path to config] + gocc.sh [options] + gocc.sh [options] config_file Input directory is expected to contain hbpc.csv and clc.csv files. Options: -h | --help print help and exit - -i | --input path to input directory -c | --config path to configuration file -o | --output path to output directory + --clc path to clc report input csv file + --hbpc path to hbpc report input csv file + --dementia (unimplimented) path to dementia report input csv file HEREDOC # Parse args @@ -29,10 +30,6 @@ while (( "$#" )); do echo "${USE_MSG}" exit 0 ;; - -i|--input) - INPUT_DIR=$2 - shift 2 - ;; -c|--config) CONFIG_FILE=$2 shift 2 @@ -41,6 +38,18 @@ while (( "$#" )); do OUTPUT_DIR=$2 shift 2 ;; + --clc) + CLC_INPUT=$2 + shift 2 + ;; + --hbpc) + HBPC_INPUT=$2 + shift 2 + ;; + --dementia) + DEMENTIA_INPUT=$2 + shift 2 + ;; --) # end argument parsing shift break @@ -56,23 +65,16 @@ while (( "$#" )); do esac done -# Sort out input directory and shift params if used -if [[ -z $INPUT_DIR ]]; then - if [[ ${PARAMS[0]} ]]; then - INPUT_DIR="${PARAMS[0]}" - PARAMS=("${PARAMS[@]:1}") - else - echo "Aborting: Input directory required." - exit 1 - fi +# Check for input options +if [[ -z ${CLC_INPUT} ]] && [[ -z ${HBPC_INPUT} ]] && [[ -z ${DEMENTIA_INPUT} ]]; then + echo "Aborting: Report input file required. See -h for input options." + exit 1 fi # Sort out config file. # Check 2nd param (unshifted case) then 1st param (shifted case) if [[ -z $CONFIG_FILE ]]; then - if [[ ${PARAMS[1]} ]]; then - CONFIG_FILE="${PARAMS[1]}" - elif [[ ${PARAMS[0]} ]]; then + if [[ ${PARAMS[0]} ]]; then CONFIG_FILE="${PARAMS[0]}" else echo "Aborting: Config file required." @@ -87,8 +89,10 @@ fi echo "Creating ${OUTPUT_DIR} if it doesn't exist" mkdir -p "${OUTPUT_DIR}" +INPUT_ARGS="clc='${CLC_INPUT}', hbpc='${HBPC_INPUT}', dementia='${DEMENTIA_INPUT}'" + echo "Running GoCC R Package." -EXPR="gocc::main('${INPUT_DIR}', '${CONFIG_FILE}', '${OUTPUT_DIR}')" +EXPR="gocc::main(config_path='${CONFIG_FILE}', output_dir='${OUTPUT_DIR}', ${INPUT_ARGS})" echo "${EXPR}" Rscript --vanilla --default-packages=gocc -e "${EXPR}" From c9d18d9a9ddddeb314e53fec71bfae6052540f65 Mon Sep 17 00:00:00 2001 From: Colin Gross Date: Wed, 27 Jun 2018 16:18:24 -0400 Subject: [PATCH 2/3] Add update section to readme instructions. --- readme.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index c63d887..648da55 100644 --- a/readme.md +++ b/readme.md @@ -12,12 +12,29 @@ For example, if `/home/joe/projects/gocc` is where the gocc source directory wil 1. Install gocc package dependencies ```console Rscript --vanilla -e 'install.packages("devtools", repos="http://cran.us.r-project.org")' - Rscript --vanilla -e 'devtools::install_deps(pkg="gocc", dependencies="Import")' + Rscript --vanilla -e 'devtools::install_deps(pkg="gocc", dependencies=TRUE)' ``` 1. Install gocc package from source directory ```console R CMD INSTALL --preclean --no-multiarch --with-keep.source gocc ``` +### Update +1. Navigate to gocc source directory + ```console + cd /path/to/gocc + ``` +1. Pull latest source code + ```console + git pull + ``` +1. Navidate on directory up and follow final two steps from installation instructions above. + ```console + cd .. + Rscript --vanilla -e 'install.packages("devtools", repos="http://cran.us.r-project.org")' + Rscript --vanilla -e 'devtools::install_deps(pkg="gocc", dependencies=TRUE)' + R CMD INSTALL --preclean --no-multiarch --with-keep.source gocc + ``` + ## Package Use The package comes with a command line utility script in `bin/gocc.sh` From f5ae3d687d18911845d0b987c3a41de47466f082 Mon Sep 17 00:00:00 2001 From: Colin Gross Date: Wed, 27 Jun 2018 16:21:43 -0400 Subject: [PATCH 3/3] Remove deprecated bin scripts. --- bin/build_reports.sh | 74 -------------------------------- bin/generate_example_data.sh | 76 --------------------------------- bin/install_required_packages.r | 62 --------------------------- 3 files changed, 212 deletions(-) delete mode 100755 bin/build_reports.sh delete mode 100755 bin/generate_example_data.sh delete mode 100644 bin/install_required_packages.r diff --git a/bin/build_reports.sh b/bin/build_reports.sh deleted file mode 100755 index 91b8452..0000000 --- a/bin/build_reports.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env bash - -# Run R scripts to build HBPC and CLC reports if data is available. - -# Check that this is being run from the project root directory, -get_script_dir(){ - SOURCE="${BASH_SOURCE[0]}" - # While $SOURCE is a symlink, resolve it - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - SOURCE="$( readlink "$SOURCE" )" - # If $SOURCE was a relative symlink, - # resolve it relative to the symlink base directory - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - echo "$DIR" -} - -# Assume project root is one directory up, and -# change to the project root directory. -SCRIPT_DIR=$(get_script_dir) -PROJ_DIR=$(dirname ${SCRIPT_DIR}) -cd ${PROJ_DIR} - -LOGFILE="${PROJ_DIR}/build/build.log" -DATE=$(date +%Y-%m-%d:%H:%M:%S) - - -echo "Started build log at ${PROJ_DIR}/build/build.log" - -# Clear out any existing intermediate data -echo "Starting Build: ${DATE}" > "${LOGFILE}" -echo "Removing any intermediate data" >> "${LOGFILE}" -find "${PROJ_DIR}" -type f -name '*.rdata' -exec rm {} + - -echo "Checking if required R packages are installed" | tee -a "${LOGFILE}" -Rscript "${PROJ_DIR}/lib/install_required_packages.r" 1>>"${LOGFILE}" 2>&1 -if [ $? -ne 0 ] -then - echo "Problem installing R packages." - echo "Verify required packages are installed and accessible." - exit 1 -fi - -echo "Checking that input data has expected headers" | tee -a "${LOGFILE}" -Rscript "${PROJ_DIR}/lib/check_input.r" 1>>"${LOGFILE}" 2>&1 -if [ $? -ne 0 ] -then - echo "Problem encountered verifying input data." - echo "Check that expected header and columns are present." - exit 1 -fi - -echo "Filtering the input data" | tee -a "${LOGFILE}" -Rscript "${PROJ_DIR}/lib/filter_input.r" 1>>"${LOGFILE}" 2>&1 -if [ $? -ne 0 ] -then - echo "Problem filtering input data." - echo "Check build log for more information." - exit 1 -fi - -echo "Calculating performance measures" | tee -a "${LOGFILE}" -Rscript "${PROJ_DIR}/lib/calc_perf_measures.r" 1>>"${LOGFILE}" 2>&1 - -echo "Building report figures and tex" | tee -a "${LOGFILE}" -Rscript "${PROJ_DIR}/lib/build_all_tex.r" 1>>"${LOGFILE}" 2>&1 - -echo "Finding tex reports and compiling to pdf" | tee -a "${LOGFILE}" -find ${PROJ_DIR}/build -name '*.tex' -execdir pdflatex {} \; 1>>"${LOGFILE}" 2>&1 - -echo "Done" 1>> "${LOGFILE}" -echo "Generated reports in ${PROJ_DIR}/build/reports" diff --git a/bin/generate_example_data.sh b/bin/generate_example_data.sh deleted file mode 100755 index df51537..0000000 --- a/bin/generate_example_data.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash - -# Discover project root directory regardless of symlinks -get_script_dir(){ - SOURCE="${BASH_SOURCE[0]}" - # While $SOURCE is a symlink, resolve it - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - SOURCE="$( readlink "$SOURCE" )" - # If $SOURCE was a relative symlink, - # resolve it relative to the symlink base directory - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - echo $DIR -} - -# Assume project root is one directory up, and -# change to the project root directory. -SCRIPT_DIR=$(get_script_dir) -PROJ_DIR=$(dirname ${SCRIPT_DIR}) - -echo "Project Directory: $(PROJ_DIR)" - -DATE=$(date +%Y-%m-%d_%H%M%S) -BACKUP_CFG="${PROJ_DIR}/config/report_settings.${DATE}.yml" -BACKUP_CLC_DATA="${PROJ_DIR}/input/clc.${DATE}.csv" -BACKUP_HBPC_DATA="${PROJ_DIR}/input/hpbc.${DATE}.csv" - -# Backup report_settings.yml if present -if [ -f "$(PROJ_DIR)/config/report_settings.yml" ] -then - echo "~~~" - echo "Backing up report settings to ${BACKUP_CFG}" - cp $(PROJ_DIR)/config/report_settings.yml ${BACKUP_CFG} -fi - -if [ ! -f "$(PROJ_DIR)/config/report_settings.yml.sample" ] -then - echo "!!!" - echo "Sample report settings missing!" - echo "!!!" - exit 1 -else - echo "~~~" - echo "Copying sample settings to config/report_settings.yml" - cp $(PROJ_DIR)/config/report_settings.yml.sample $(PROJ_DIR)/config/report_settings.yml -fi - -# Backup clc and hbpc data if present -if [ -f $(PROJ_DIR)/input/clc.csv ] -then - echo "~~~" - echo "Backing up clc.csv to ${BACKUP_CLC_DATA}" - cp $(PROJ_DIR)/input/clc.csv ${BACKUP_CLC_DATA} -fi - -if [ -f $(PROJ_DIR)/input/hbpc.csv ] -then - echo "~~~" - echo "Backing up hbpc.csv to ${BACKUP_HBPC_DATA}" - cp $(PROJ_DIR)/input/hbpc.csv ${BACKUP_HBPC_DATA} -fi - -# Generate sample data - echo "~~~" - echo "Generating synthetic clc.clv and hbpc.clc in input/" - -Rscript "${PROJ_DIR}/lib/synth_clc_data.r" -Rscript "${PROJ_DIR}/lib/synth_hbpc_data.r" - -# Print Running instructions - echo "~~~" - echo "Run build script:" - echo "bin/build_reports.sh" - echo "" diff --git a/bin/install_required_packages.r b/bin/install_required_packages.r deleted file mode 100644 index 4fbdba9..0000000 --- a/bin/install_required_packages.r +++ /dev/null @@ -1,62 +0,0 @@ -# Install packages required for generating reports - -if(require('ggplot2', quietly=TRUE) == FALSE){ - install.packages('ggplot2') -} else { - print('ggplot2 already installed.') -} - -if(require('ggthemes', quietly=TRUE) == FALSE){ - install.packages('ggthemes') -} else { - print('ggthemes already installed.') -} - -if(require('scales', quietly=TRUE) == FALSE){ - install.packages('scales') -} else { - print('scales already installed.') -} - -if(require('viridis', quietly=TRUE) == FALSE){ - install.packages('viridis') -} else { - print('viridis already installed.') -} - -if(require('tidyverse', quietly=TRUE) == FALSE){ - install.packages('tidyverse') -} else { - print('tidyverse already installed.') -} - -if(require('config', quietly=TRUE) == FALSE){ - install.packages('config') -} else { - print('config already installed.') -} - -if(require('stringr', quietly=TRUE) == FALSE){ - install.packages('stringr') -} else { - print('stringr already installed.') -} - -# knitr use here requires pandoc -if(require('knitr', quietly=TRUE) == FALSE){ - install.packages('knitr', dependencies = TRUE) -} else { - print('knitr already installed.') -} - -if(require('rmarkdown', quietly=TRUE) == FALSE){ - install.packages('rmarkdown') -} else { - print('rmarkdown already installed.') -} - -if(require('here', quietly=TRUE) == FALSE){ - install.packages('here') -} else { - print('here already installed.') -}