Skip to content

Commit

Permalink
switch to using https to scrape queued packages, bump version
Browse files Browse the repository at this point in the history
  • Loading branch information
fmichonneau committed May 5, 2021
1 parent 21aa8e0 commit 30d7c39
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 49 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: foghorn
Title: Summarize CRAN Check Results in the Terminal
Version: 1.3.2.9000
Version: 1.4.0
Authors@R:
c(person(given = "Francois",
family = "Michonneau",
Expand Down Expand Up @@ -38,6 +38,5 @@ VignetteBuilder:
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# foghorn

# v1.4.0

## Other Changes

* The inspection of the queue from CRAN incoming is now using HTTPS instead of
FTP.
* The data in the `size` column for the object returned by `cran_incoming()` is
now of type character.

# v1.3.2

## Bug fixes
Expand Down
86 changes: 45 additions & 41 deletions R/cran_queue.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ new_cran_q <- function(package = character(0),
version = as.package_version(character(0)),
cran_folder = character(0),
time = as.POSIXct(character(0)),
size = integer(0)) {
size = character(0)) {
stopifnot(is.character(package))
stopifnot(is.package_version(version))
stopifnot(is.character(cran_folder))
stopifnot(inherits(time, "POSIXct"))
stopifnot(is.integer(size))
stopifnot(is.character(size))

tibble::tibble(
package,
Expand All @@ -18,51 +18,52 @@ new_cran_q <- function(package = character(0),
)
}

parse_cran_incoming <- function(res) {
if (length(res$content) == 0) {
parse_http_cran_incoming <- function(res_raw) {
if (identical(length(res_raw$content), 0L)) {
return(new_cran_q())
}

rr <- read.table(
text = rawToChar(res$content),
stringsAsFactors = FALSE
)
res <- rawToChar(res_raw$content)
res <- xml2::read_html(res)
res <- rvest::html_table(res)

## people sometimes upload other stuff to the FTP server
rr <- rr[grepl("\\.tar.gz$", rr$V9), ]

pkgs <- parse_pkg(rr$V9)
## curl doesn't return the year for files that are less than
## 6 month old.
## see this thread: https://curl.se/mail/archive-2008-01/0007.html
##
## Assume that the files are from the current year,
## if the date is in the future, then go back one year
## (that's what is done with `future_times` below)
##
## FIXME: we don't know where the year will appear when it
## will be listed so files more than a year old will still
## have the wrong date!
year <- substr(Sys.time(), 1, 4)
## avoid need for locale-dependent month name match
## (CRAN FTP info uses English abbrevs)
month <- match(rr$V6, month.abb)
timestr <- with(rr, sprintf("%d-%s-%s %s", V7, month, year, V8))
time <- as.POSIXct(timestr, format = "%d-%m-%Y %H:%M", tz = "Europe/Vienna")

## fix dates in the future
future_times <- difftime(Sys.time(), time) < 0
time[future_times] <- as.POSIXct(
with(rr[future_times, ], sprintf("%d-%s-%s %s", V7, month[future_times], as.numeric(year) - 1L, V8)),
format = "%d-%m-%Y %H:%M", tz = "Europe/Vienna"
)
if (!identical(length(res), 1L)) {
stop(
"Issue with formatting of table. Please report the bug.",
call. = FALSE
)
}

res <- res[[1]]

res <- res[nzchar(res$Name) & res$Name != "Parent Directory", , drop = FALSE]

if (identical(nrow(res), 0L)) {
return(new_cran_q())
}

pkgs <- parse_pkg(res$Name)
time <- as.POSIXct(res[["Last modified"]], tz = "Europe/Vienna")
size <- res$Size

new_cran_q(
package = pkgs$package,
version = pkgs$version,
cran_folder = basename(res$url),
cran_folder = basename(res_raw$url),
time = time,
size = rr$V5
size = size
)
}

parse_pkg_version <- function(pkg) {
vapply(
pkg, function(x) {
if (is.na(x[2])) {
return("0.0.0")
}
x[2]
},
character(1)
)
}

Expand All @@ -71,7 +72,7 @@ parse_pkg <- function(pkg) {
pkg <- strsplit(pkg, "_")
tibble::tibble(
package = vapply(pkg, function(x) x[1], character(1)),
version = as.package_version(vapply(pkg, function(x) x[2], character(1)))
version = as.package_version(parse_pkg_version(pkg))
)
}

Expand Down Expand Up @@ -161,6 +162,9 @@ cran_ftp <- function(pkg, folders, url) {
##' \item{size}{the size of the package tarball}
##' }
##'
##' Note that if the package version is not provided, it will appear as `0.0.0`
##' in the `tibble`.
##'
##' @examples
##' \dontrun{
##' ## all the packages in the CRAN incoming queue
Expand All @@ -184,10 +188,10 @@ cran_incoming <- function(pkg = NULL,
res_data <- cran_ftp(
pkg = pkg,
folders = folders,
url = "ftp://cran.r-project.org/incoming/"
url = "https://cran.r-project.org/incoming/"
)

res <- lapply(res_data, function(x) parse_cran_incoming(x))
res <- lapply(res_data, function(x) parse_http_cran_incoming(x))
res <- do.call("rbind", res)

if (!is.null(pkg)) {
Expand Down
4 changes: 4 additions & 0 deletions R/foghorn.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
cran_url <- function(protocol = "https") {
protocol <- match.arg(protocol, c("https", "http", "ftp"))
if (identical(protocol, "ftp")) {
warning(
"The FTP protocol on the CRAN servers will soon be deprecated ",
"because they are insecure. Consider using HTTPS."
)
return("ftp://cran.r-project.org")
}

Expand Down
8 changes: 4 additions & 4 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@

### v1.3.2
### v1.4.0

## Test environments

- Ubuntu 16.04, R 3.6.3, R 4.0.3, R-devel (2020-12-15 r79641)
- macOS (macOS Catalina 10.15.7), R 4.0.3
- windows R 3.6.3, 4.0.3
- Ubuntu 20.04, R 3.6.3, R 4.0.5, R-devel (2021-05-04 r80263)
- macOS (macOS Catalina 10.15.7), R 4.0.5
- windows R 3.6.3, 4.0.5

## R CMD check results

Expand Down
12 changes: 12 additions & 0 deletions docs/articles/foghorn_files/header-attrs-2.7/header-attrs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
4 changes: 3 additions & 1 deletion tests/testthat/test-foghorn.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

test_that(
"invalid source name",
expect_error(cran_results(pkg = "dplyr", src = "foo"))
{
expect_error(cran_results(pkg = "dplyr", src = "foo"))
}
)


Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-incoming.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ test_that("Documentation is correct", {
version = c("package_version", "numeric_version"),
cran_folder = "character",
time = c("POSIXct", "POSIXt"),
size = "integer"
size = "character"
)
)
})
Expand Down

0 comments on commit 30d7c39

Please sign in to comment.