diff --git a/.github/workflows/tests-r-via-nix.yaml b/.github/workflows/tests-r-via-nix.yaml index 5a4c0d23..6a283c91 100644 --- a/.github/workflows/tests-r-via-nix.yaml +++ b/.github/workflows/tests-r-via-nix.yaml @@ -23,6 +23,11 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Create .Renviron + run: | + echo "GITHUB_PAT=${{ secrets.GITHUB_TOKEN }}" >> ~/.Renviron + shell: bash + - uses: cachix/install-nix-action@v25 with: nix_path: nixpkgs=https://github.com/rstats-on-nix/nixpkgs/archive/refs/heads/r-daily.tar.gz diff --git a/NAMESPACE b/NAMESPACE index e8f04a14..1c347ce7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,8 +13,10 @@ export(tar_nix_ga) export(with_nix) importFrom(codetools,checkUsage) importFrom(codetools,findGlobals) +importFrom(curl,curl_fetch_disk) importFrom(curl,curl_fetch_memory) importFrom(curl,handle_reset) +importFrom(curl,handle_setheaders) importFrom(curl,has_internet) importFrom(curl,new_handle) importFrom(jsonlite,fromJSON) diff --git a/R/fetchers.R b/R/fetchers.R index d5eba7bf..0f5fd90b 100644 --- a/R/fetchers.R +++ b/R/fetchers.R @@ -166,10 +166,11 @@ remove_base <- function(list_imports) { #' Finds dependencies of a package from the DESCRIPTION file #' @param path path to package +#' @param commit_date date of commit #' @importFrom utils untar #' @return Atomic vector of packages #' @noRd -get_imports <- function(path) { +get_imports <- function(path, commit_date) { tmpdir <- tempdir() on.exit(unlink(tmpdir, recursive = TRUE, force = TRUE), add = TRUE) @@ -224,17 +225,10 @@ get_imports <- function(path) { remote_pkgs_names <- remote_pkgs_names_and_refs |> sapply(function(x) x[[1]]) - - # Check if we have a list of lists of two elements: a package name - # and a ref. If not, add "HEAD" to it. - remote_pkgs_refs <- lapply(remote_pkgs_names_and_refs, function(sublist) { - if (length(sublist) == 1) { - c(sublist, "HEAD") - } else { - sublist - } - }) |> - sapply(function(x) x[[2]]) + # try to get commit hash for each package if not already provided + remote_pkgs_refs <- lapply(remote_pkgs_names_and_refs, function(x) { + resolve_package_commit(x, commit_date, remotes) + }) urls <- paste0( "https://github.com/", @@ -431,3 +425,164 @@ get_remote <- function(git_pkg) { remote_package_names <- sapply(remotes, `[[`, "package_name") return(remote_package_names) } + +#' get_commit_date Retrieves the date of a commit from a Git repository +#' @param repo The GitHub repository (e.g. "r-lib/usethis") +#' @param commit_sha The commit hash of interest +#' @return A character. The date of the commit. +#' @importFrom curl new_handle handle_setheaders curl_fetch_memory +#' @importFrom jsonlite fromJSON +#' @noRd +get_commit_date <- function(repo, commit_sha) { + url <- paste0("https://api.github.com/repos/", repo, "/commits/", commit_sha) + h <- new_handle() + + token <- Sys.getenv("GITHUB_PAT") + token_pattern <- "^(gh[ps]_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59})$" + + if (grepl(token_pattern, token)) { + handle_setheaders(h, Authorization = paste("token", token)) + } else { + message("No GitHub Personal Access Token found. Please set GITHUB_PAT in your environment. Falling back to unauthenticated API request.") + } + + tryCatch({ + response <- curl_fetch_memory(url, handle = h) + if (response$status_code != 200) { + stop("API request failed with status code: ", response$status_code) + } + commit_data <- fromJSON(rawToChar(response$content)) + if (is.null(commit_data$commit$committer$date)) { + stop("Invalid response format: missing commit date") + } + commit_data$commit$committer$date + }, error = function(e) { + stop("Failed to get commit date for ", commit_sha, ": ", e$message) + }) +} + +#' download_all_commits Downloads commits (maximum 1000) from a GitHub repository +#' @param repo The GitHub repository (e.g. "r-lib/usethis") +#' @param date The target date to find the closest commit +#' @return A data frame with commit SHAs and dates +#' @importFrom curl new_header handle_setheaders curl_fetch_memory +#' @importFrom jsonlite fromJSON +#' @noRd +download_all_commits <- function(repo, date) { + base_url <- paste0("https://api.github.com/repos/", repo, "/commits") + h <- new_handle() + + token <- Sys.getenv("GITHUB_PAT") + token_pattern <- "^(gh[ps]_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59})$" + + if (grepl(token_pattern, token)) { + handle_setheaders(h, Authorization = paste("token", token)) + } else { + message("No GitHub Personal Access Token found. Please set GITHUB_PAT in your environment. Falling back to unauthenticated API request.") + } + + # Limit to 10 pages of 100 commits each, so 1000 commits in total + per_page <- 100 + max_pages <- 10 + max_commits <- per_page * max_pages + + # Pre-allocate results data frame + all_commits <- data.frame( + sha = character(max_commits), + date = as.POSIXct(rep(NA, max_commits)) + ) + commit_count <- 0 + + for (page in 1:max_pages) { + url <- paste0(base_url, "?per_page=", per_page, "&page=", page) + + tryCatch( + { + response <- curl_fetch_memory(url, handle = h) + if (response$status_code != 200) { + stop("API request failed with status code: ", response$status_code) + } + + commits <- fromJSON(rawToChar(response$content)) + if (!is.list(commits) || length(commits) == 0) break + + # if no commits are found, break the loop + n_commits <- length(commits$sha) + if (n_commits == 0) break + + + idx <- (commit_count + 1):(commit_count + n_commits) + all_commits$sha[idx] <- commits$sha + all_commits$date[idx] <- as.POSIXct( + commits$commit$committer$date, + format = "%Y-%m-%dT%H:%M:%OSZ" + ) + + commit_count <- commit_count + n_commits + + # if the date of the last commit is before the target date, break the loop + if (min(all_commits$date, na.rm = TRUE) < date) break + + }, + error = function(e) { + stop("Failed to download commit data: ", e$message) + } + ) + } + + # Return only the rows with actual data + all_commits[1:commit_count, ] +} + +#' get_closest_commit Finds the closest commit to a specific date +#' @param commits_df A data frame with commit SHAs and dates +#' @param target_date The target date to find the closest commit +#' @return A data frame with the closest commit SHA and date +#' @noRd +get_closest_commit <- function(commits_df, target_date) { + # Convert target_date to POSIXct format + target_date <- as.POSIXct(target_date, format = "%Y-%m-%dT%H:%M:%OSZ") + + # Filter commits before or on the target date + filtered_commits <- commits_df[commits_df$date <= target_date, ] + + # If no commits found, raise an error + if (nrow(filtered_commits) == 0) { + stop("No commits found before or on the target date.") + } + + # Find the closest commit by selecting the maximum date + closest_commit <- filtered_commits[which.max(filtered_commits$date), ] + return(closest_commit) +} + +#' resolve_package_commit Resolves the commit SHA for a package based on a date +#' @param remote_pkg_name_and_ref A list containing the package name and optionally a ref +#' @param date The target date to find the closest commit +#' @param remotess A character vector of remotes +#' @return A character. The commit SHA of the closest commit to the target date or "HEAD" if API fails +#' @noRd +resolve_package_commit <- function(remote_pkg_name_and_ref, date, remotes) { + # Check if remote is a list with a package name and a ref + if (length(remote_pkg_name_and_ref) == 2) { + # Keep existing ref if present + return(remote_pkg_name_and_ref[[2]]) + } else if (length(remote_pkg_name_and_ref) == 1) { + # For packages without ref, try to find closest one by date + # fallback to HEAD if API fails + result <- tryCatch({ + remotes_fetch <- remotes[grepl(remote_pkg_name_and_ref, remotes)] + all_commits <- download_all_commits(remotes_fetch, date) + closest_commit <- get_closest_commit(all_commits, date) + closest_commit$sha + }, + error = function(e) { + message(paste0("Failed to get commit for ", remote_pkg_name_and_ref, + ": ", e$message, "\nFalling back to HEAD")) + return("HEAD") + }) + return(result) + } else { + stop("remote_pkg_name_and_ref must be a list of length 1 or 2") + } +} diff --git a/R/nix_hash.R b/R/nix_hash.R index 60fd1b19..1df9aeab 100644 --- a/R/nix_hash.R +++ b/R/nix_hash.R @@ -21,7 +21,6 @@ nix_hash <- function(repo_url, commit) { } } - #' Return the SRI hash of an URL with .tar.gz #' @param url String with URL ending with `.tar.gz` #' @return list with following elements: @@ -94,7 +93,25 @@ hash_url <- function(url) { paths <- list.files(path_to_src, full.names = TRUE, recursive = TRUE) desc_path <- grep(file.path(list.files(path_to_src), "DESCRIPTION"), paths, value = TRUE) - deps <- get_imports(desc_path) +if (grepl("github", url)) { + repo_url_short <- paste(unlist(strsplit(url, "/"))[4:5], collapse = "/") + commit <- gsub(x = basename(url), pattern = ".tar.gz", replacement = "") + commit_date <- tryCatch( + { + get_commit_date(repo_url_short, commit) + }, + error = function(e) { + message(paste0( + "Failed to get commit date for ", commit, ": ", e$message, + "\nFalling back to today" + )) + return(Sys.Date()) + } + ) +} + +deps <- get_imports(desc_path, commit_date) + return( list( diff --git a/tests/testthat/_snaps/renv_helpers/default_datathin.nix b/tests/testthat/_snaps/renv_helpers/default_datathin.nix index c67774fb..c67c1664 100644 --- a/tests/testthat/_snaps/renv_helpers/default_datathin.nix +++ b/tests/testthat/_snaps/renv_helpers/default_datathin.nix @@ -347,7 +347,7 @@ let name = "datathin"; src = pkgs.fetchgit { url = "https://github.com/anna-neufeld/datathin"; - rev = "HEAD"; + rev = "58eb154609365fa7301ea0fa397fbf04dd8c28ed"; sha256 = "sha256-rtRpwFI+JggX8SwnfH4SPDaMPK2yLhJFTgzvWT+Zll4="; }; propagatedBuildInputs = builtins.attrValues { diff --git a/tests/testthat/test-fetchers.R b/tests/testthat/test-fetchers.R index 6565e985..6fcddde3 100644 --- a/tests/testthat/test-fetchers.R +++ b/tests/testthat/test-fetchers.R @@ -132,6 +132,135 @@ testthat::test_that("Test fetchgit gets a package with several remote deps and c commit = "ee5505c817b19b59d37236ed35a81a65aa376124" ) ), - "\n httr2 = (pkgs.rPackages.buildRPackage {\n name = \"httr2\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/httr2\";\n rev = \"HEAD\";\n sha256 = \"sha256-ny4J2WqUL4LPLWRKS8rgVqwvgMOQ2Rm/lbBWtF+99PE=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n cli\n curl\n glue\n lifecycle\n magrittr\n openssl\n R6\n rappdirs\n rlang\n vctrs\n withr;\n };\n });\n\n gh = (pkgs.rPackages.buildRPackage {\n name = \"gh\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/gh\";\n rev = \"HEAD\";\n sha256 = \"sha256-POXEMZv8kqHokAxK8LoWkS0qYrcIcVdQi5xyGD992KU=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n cli\n gitcreds\n glue\n ini\n jsonlite\n lifecycle\n rlang;\n } ++ [ httr2 ];\n });\n\n\n highlite = (pkgs.rPackages.buildRPackage {\n name = \"highlite\";\n src = pkgs.fetchgit {\n url = \"https://github.com/jimhester/highlite\";\n rev = \"HEAD\";\n sha256 = \"sha256-lkWMlAi75MYxiBUYnLwxLK9ApXkWanA4Mt7g4qtLpxM=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n Rcpp\n BH;\n };\n });\n\n\n memoise = (pkgs.rPackages.buildRPackage {\n name = \"memoise\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/memoise\";\n rev = \"74d62c8\";\n sha256 = \"sha256-fsdop66VglkOIYrJ0LKZKikIZmzQ2gqEATLy9tTJ/B8=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n digest;\n };\n });\n\n lookup = (pkgs.rPackages.buildRPackage {\n name = \"lookup\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/lookup/\";\n rev = \"ee5505c817b19b59d37236ed35a81a65aa376124\";\n sha256 = \"sha256-jiSBuC1vzJbN6OckgVX0E+XuMCeZS5LKsldIVL7DNgo=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n Rcpp\n codetools\n crayon\n rex\n jsonlite\n rstudioapi\n withr\n httr;\n } ++ [ highlite gh memoise ];\n });\n" + "\n httr2 = (pkgs.rPackages.buildRPackage {\n name = \"httr2\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/httr2\";\n rev = \"15243331d3f6192e1a2c210b2959d6fec63402c4\";\n sha256 = \"sha256-ny4J2WqUL4LPLWRKS8rgVqwvgMOQ2Rm/lbBWtF+99PE=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n cli\n curl\n glue\n lifecycle\n magrittr\n openssl\n R6\n rappdirs\n rlang\n vctrs\n withr;\n };\n });\n\n gh = (pkgs.rPackages.buildRPackage {\n name = \"gh\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/gh\";\n rev = \"HEAD\";\n sha256 = \"sha256-POXEMZv8kqHokAxK8LoWkS0qYrcIcVdQi5xyGD992KU=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n cli\n gitcreds\n glue\n ini\n jsonlite\n lifecycle\n rlang;\n } ++ [ httr2 ];\n });\n\n\n highlite = (pkgs.rPackages.buildRPackage {\n name = \"highlite\";\n src = pkgs.fetchgit {\n url = \"https://github.com/jimhester/highlite\";\n rev = \"767b122ef47a60a01e1707e4093cf3635a99c86b\";\n sha256 = \"sha256-lkWMlAi75MYxiBUYnLwxLK9ApXkWanA4Mt7g4qtLpxM=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n Rcpp\n BH;\n };\n });\n\n\n memoise = (pkgs.rPackages.buildRPackage {\n name = \"memoise\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/memoise\";\n rev = \"74d62c8\";\n sha256 = \"sha256-fsdop66VglkOIYrJ0LKZKikIZmzQ2gqEATLy9tTJ/B8=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n digest;\n };\n });\n\n lookup = (pkgs.rPackages.buildRPackage {\n name = \"lookup\";\n src = pkgs.fetchgit {\n url = \"https://github.com/b-rodrigues/lookup/\";\n rev = \"ee5505c817b19b59d37236ed35a81a65aa376124\";\n sha256 = \"sha256-jiSBuC1vzJbN6OckgVX0E+XuMCeZS5LKsldIVL7DNgo=\";\n };\n propagatedBuildInputs = builtins.attrValues {\n inherit (pkgs.rPackages) \n Rcpp\n codetools\n crayon\n rex\n jsonlite\n rstudioapi\n withr\n httr;\n } ++ [ highlite gh memoise ];\n });\n" ) }) + +testthat::test_that("get_commit_date works with valid repo and commit", { + testthat::skip_on_cran() + date <- get_commit_date( + "ropensci/rix", + "cd7a53f7c670bd5106a94b48573d5f824174170f" + ) + testthat::expect_match(date, "2025-01-10T07:05:02Z") +}) + +testthat::test_that("get_commit_date fails with invalid repo", { + testthat::skip_on_cran() + testthat::expect_error( + get_commit_date( + "nonexistent/repo", + "cd7a53f7c670bd5106a94b48573d5f824174170f" + ), + "Failed to get commit date for cd7a53f7c670bd5106a94b48573d5f824174170f" + ) +}) + +testthat::test_that("get_commit_date fails with invalid commit", { + testthat::skip_on_cran() + testthat::expect_error( + get_commit_date( + "ropensci/rix", + "ad7a53f7c670bd5106a94b48573d5f824174170f" + ), + "Failed to get commit date for ad7a53f7c670bd5106a94b48573d5f824174170f" + ) +}) + +testthat::test_that("get_commit_date tells you when no GitHub token is found, but still works", { + testthat::skip_on_cran() + # Temporarily unset GITHUB_PAT if it exists + old_pat <- Sys.getenv("GITHUB_PAT") + Sys.unsetenv("GITHUB_PAT") + on.exit(Sys.setenv(GITHUB_PAT = old_pat)) + + testthat::expect_message( + date <- get_commit_date( + "ropensci/rix", + "cd7a53f7c670bd5106a94b48573d5f824174170f" + ), + "No GitHub Personal Access Token found. Please set GITHUB_PAT in your environment. Falling back to unauthenticated API request." + ) + testthat::expect_match(date, "2025-01-10T07:05:02Z") +}) + +testthat::test_that("Test download_all_commits works with valid repo", { + testthat::skip_on_cran() + commits <- download_all_commits("ropensci/rix", "2025-01-10T07:05:02Z") + + # Check structure + testthat::expect_true(is.data.frame(commits)) + testthat::expect_named(commits, c("sha", "date")) + + # Check content + testthat::expect_true(nrow(commits) > 0) + testthat::expect_true(all(!is.na(commits$sha))) + testthat::expect_true(all(!is.na(commits$date))) + + # Verify date format + testthat::expect_true(all(class(commits$date) %in% c("POSIXct", "POSIXt"))) +}) + +testthat::test_that("Test download_all_commits fails with invalid repo", { + testthat::skip_on_cran() + testthat::expect_error( + download_all_commits("nonexistent/repo"), + "Failed to download commit data" + ) +}) + +testthat::test_that("Test download_all_commits works without GitHub token", { + testthat::skip_on_cran() + # Temporarily unset GITHUB_PAT if it exists + old_pat <- Sys.getenv("GITHUB_PAT") + Sys.unsetenv("GITHUB_PAT") + on.exit(Sys.setenv(GITHUB_PAT = old_pat)) + + testthat::expect_message( + commits <- download_all_commits("ropensci/rix", "2025-01-10T07:05:02Z"), + "No GitHub Personal Access Token found. Please set GITHUB_PAT in your environment. Falling back to unauthenticated API request." + ) + + # Basic validation that we still got data + testthat::expect_true(is.data.frame(commits)) + testthat::expect_true(nrow(commits) > 0) +}) + +testthat::test_that("resolve_package_commit works with different input cases", { + testthat::skip_on_cran() + + # Test case 1: When ref is provided + pkg_with_ref <- c("schex", "031320d") + remotes <- c("welch-lab/liger", "SaskiaFreytag/schex@031320d") + target_date <- "2024-04-04T14:16:11Z" + testthat::expect_equal( + resolve_package_commit(remote_pkg_name_and_ref = pkg_with_ref, date = target_date, remotes = remotes), + "031320d" + ) + + # Test case 2: When no ref is provided find the closest commit + pkg_without_ref <- c("liger") + remotes <- c("welch-lab/liger", "hms-dbmi/conos") + target_date <- "2024-04-04T14:16:11Z" + testthat::expect_equal( + resolve_package_commit(remote_pkg_name_and_ref = pkg_without_ref, date = target_date, remotes = remotes), + "43fccb96b986f9da2c3a4320fe58693ca660193b" + ) + + # Test case 3: When input is invalid + testthat::expect_error( + resolve_package_commit(c(), date, remotes), + "remote_pkg_name_and_ref must be a list of length 1 or 2" + ) + +# Test case 4: resolve_package_commit falls back to HEAD when API fails + pkg_name <- c("nonexistent") + remotes <- c("user/nonexistent") + target_date <- "2024-04-04T14:16:11Z" + + testthat::expect_message( + result <- resolve_package_commit(pkg_name, target_date, remotes), + "Failed to get commit for nonexistent: Failed to download commit data" + ) + testthat::expect_equal(result, "HEAD") +})