Skip to content

Commit

Permalink
Merge pull request #87 from uw-ssec/carlos/issue75
Browse files Browse the repository at this point in the history
feat: Support data transformation "verbs" in the docker container #77
  • Loading branch information
carlosgjs authored Feb 27, 2025
2 parents 65f4fa5 + 2609908 commit 043bd12
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 66 deletions.
7 changes: 5 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ docker run --rm \
-v /path/to/data-raw/:/home/biodiversity-horizons/data-raw/ \
-v /path/to/outputs/:/home/biodiversity-horizons/outputs/ \
biodiversity-horizons \
Rscript scripts/exposure_workflow.R /home/biodiversity-horizons/data-raw multicore 4
exposure \
-d /home/biodiversity-horizons/data-raw \
-p multicore \
-w 4
```

This command:
Expand All @@ -151,7 +154,7 @@ Example:
```bash
docker run --rm \
-v $(pwd)/data-raw/:/home/biodiversity-horizons/data-raw/ \
biodiversity-horizons -d ./data-raw
biodiversity-horizons exposure -d ./data-raw
```

## Pull Requests
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ Imports:
logger,
tidyverse,
optparse,
here
here,
stars
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
5 changes: 2 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,10 @@ COPY NAMESPACE .
# and the image builds much faster when changing code
RUN Rscript -e "remotes::install_local('.', dependencies=TRUE)" # Install dependencies
COPY R ./R
COPY utility ./utility
RUN Rscript -e "remotes::install_local('.', dependencies=TRUE)" # install package code

COPY scripts ./scripts

# Run the script with "data-raw/" as path since that is where, run_container.sh will mount the data
# We'll also use "multisession", and (availableCores()-1) workers as default
# The user can override by passing in arguments at runtime, e.g.: "multisession" 4
# Run the main script, which can take arguments to determine the workflow to run
ENTRYPOINT ["Rscript", "scripts/main.R"]
12 changes: 12 additions & 0 deletions docker_exposure.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
source run_container.sh

if [ $# -lt 2 ]; then
echo "usage: $0 <data_dir> <output_dir> <extra-args>"
exit 1
fi

DATA_DIR=$1
OUT_DIR=$2
shift 2
run_with_mounts $DATA_DIR $OUT_DIR exposure -d data-raw $*
13 changes: 13 additions & 0 deletions docker_shp2rds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
source run_container.sh

if [ $# -lt 2 ]; then
echo "Usage: $0 <input_file> <output__file> <extra-args>"
exit 1
fi
INPUT_FILE=$1
OUT_FILE=$2
shift 2
run_with_mounts $(dirname $INPUT_FILE) $(dirname $OUT_FILE) \
shp2rds -i ./data-raw/$(basename $INPUT_FILE) \
-o ./outputs/$(basename $OUT_FILE) $*
45 changes: 21 additions & 24 deletions run_container.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,27 @@
#!/bin/bash
# This script runs the Docker container with the provided directories for input and output data
# CONTAINER=biodiv # for development, change this to the name of your local Docker ccontainer build
CONTAINER=ghcr.io/uw-ssec/biodiversity-horizons

# Check if two arguments are provided
if [ "$#" -lt 2 ]; then
echo "Usage: $0 <data dir> <output dir>"
exit 1
fi
function run_with_mounts() {

# Assign arguments to variables
DATA_DIR=$1
OUT_DIR=$2
shift 2
# run the container with the given directories mounted as volumes
# and then pass any additional arguments to the container
DATA_DIR=$1
OUT_DIR=$2
shift 2

# Check if directories exist
if [ ! -d "$DATA_DIR" ]; then
echo "Error: Directory $DATA_DIR does not exist."
exit 1
fi
# Check if directories exist
if [ ! -d "$DATA_DIR" ]; then
echo "Error: Directory $DATA_DIR does not exist."
exit 1
fi

if [ ! -d "$OUT_DIR" ]; then
echo "Warning: Directory $OUT_DIR does not exist, creating it ..."
mkdir -p "$OUT_DIR"
fi
if [ ! -d "$OUT_DIR" ]; then
echo "Warning: Directory $OUT_DIR does not exist, creating it ..."
mkdir -p "$OUT_DIR"
fi

# Run the Docker container with the provided directories mounted as volumes
# TODO: Replace with the published Docker image name
docker run -v "$DATA_DIR":/home/biodiversity-horizons/data-raw \
-v "$OUT_DIR":/home/biodiversity-horizons/outputs \
ghcr.io/uw-ssec/biodiversity-horizons $* # Pass any additional arguments to the Docker container
docker run -v "$DATA_DIR":/home/biodiversity-horizons/data-raw \
-v "$OUT_DIR":/home/biodiversity-horizons/outputs \
$CONTAINER $* # Pass any additional arguments to the Docker container
}
85 changes: 49 additions & 36 deletions scripts/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,39 @@ parse_extent <- function(extent) {
return(ext(extent))
}

safe_parse_opts <- function(opt_parser, args) {
# Function to safely parse the options. Shows the help if there's an error.
opt <- tryCatch(
{
opt <- parse_args(opt_parser, args = args)
opt
},
error = function(e) {
cat("Error parsing arguments:", e$message, "\n")
parse_args(opt_parser, args = c("--help"))
FALSE
}
)
return(opt)
}


run_tif2rds <- function(args) {
source("utility/format_conversion_util.R")
option_list <- list(
make_option(c("-i", "--input"), type = "character",
help = "Path to the input .tif file"),
make_option(c("-o", "--output"), type = "character",
help = "Path to save the output .rds file"),
make_option(c("-y", "--year_range"), type = "character",
default = "1850:2014",
help = "Year range as a sequence (e.g., '1850:2014')")
make_option(c("-i", "--input"),
type = "character",
help = "Path to the input .tif file"
),
make_option(c("-o", "--output"),
type = "character",
help = "Path to save the output .rds file"
),
make_option(c("-y", "--year_range"),
type = "character",
default = "1850:2014",
help = "Year range as a sequence (e.g., '1850:2014')"
)
)
opt <- safe_parse_opts(OptionParser(option_list = option_list), args[-1])
check_not_null(opt$input, "input")
Expand All @@ -39,11 +61,12 @@ run_tif2rds <- function(args) {
cat("Output:", opt$output, "\n")
cat("Year range:", opt$year_range, "\n")

climate_data <- prepare_climate_data_from_tif(input_file = opt$input,
output_file = opt$output,
year_range = year_range)
climate_data <- prepare_climate_data_from_tif(
input_file = opt$input,
output_file = opt$output,
year_range = year_range
)
print("File converted successfully!")

}


Expand Down Expand Up @@ -86,7 +109,7 @@ run_shp2rds <- function(args) {
make_option(c("-w", "--workers"),
type = "numeric",
help = "Number of workers to use. Default is availableCores()-1.",
default = availableCores()-1
default = availableCores() - 1
)
)

Expand All @@ -107,33 +130,22 @@ run_shp2rds <- function(args) {
cat("Parallel:", opt$parallel, "\n")
cat("Workers:", opt$workers, "\n")

grid <- create_grid(extent_vals = extent,
resolution = opt$resolution,
crs = opt$crs)
range_data <- prepare_range_data_from_shp_file(input_file_path = opt$input,
grid = grid,
realm = opt$realm,
use_parallel = opt$parallel,
number_of_workers = opt$workers,
rds_output_file_path = opt$output)
grid <- create_grid(
extent_vals = extent,
resolution = opt$resolution,
crs = opt$crs
)
range_data <- prepare_range_data_from_shp_file(
input_file_path = opt$input,
grid = grid,
realm = opt$realm,
use_parallel = opt$parallel,
number_of_workers = opt$workers,
rds_output_file_path = opt$output
)
print("File converted successfully!")
}

safe_parse_opts <- function(opt_parser, args) {
# Function to safely parse the options. Shows the help if there's an error.
opt <- tryCatch(
{
opt <- parse_args(opt_parser, args = args)
opt
},
error = function(e) {
cat("Error parsing arguments:", e$message, "\n")
opt <- parse_args(opt_parser, args = c("--help"))
FALSE
}
)
return(opt)
}

run_exposure <- function(args) {
source("scripts/exposure_workflow.R")
Expand Down Expand Up @@ -164,6 +176,7 @@ run_exposure <- function(args) {
exposure_time_workflow(opt$data_path, opt$plan_type, opt$workers)
}

# Main function
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
stop("No command provided. Use 'shp2rds', 'tif2rds' or 'exposure'.")
Expand Down

0 comments on commit 043bd12

Please sign in to comment.