Skip to content

Commit

Permalink
add r-package-devel with some gentle prodding
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Chirico committed Apr 11, 2020
1 parent 81631a3 commit 0b664df
Showing 1 changed file with 36 additions and 26 deletions.
62 changes: 36 additions & 26 deletions update_archives.R
Original file line number Diff line number Diff line change
@@ -1,34 +1,44 @@
#!/usr/local/bin/Rscript
library(rvest)

URL = 'https://stat.ethz.ch/pipermail/r-devel/'
URL_BASE ='https://stat.ethz.ch/pipermail'
today = as.POSIXlt(Sys.time())
mailing_lists = list(
c(name = 'r-devel', current = format(today, '%Y-%B.txt')),
c(name = 'r-package-devel',
current = sprintf('%dq%d.txt', today$year + 1900L, today$mon %/% 3L + 1L))
)
for (ii in seq_along(mailing_lists)) {
this_list = mailing_lists[[ii]]
URL = file.path(URL_BASE, this_list['name'], '')

outdir = 'r-devel'
dir.create(outdir, recursive = TRUE, showWarnings = FALSE)
outdir = this_list['name']
dir.create(outdir, recursive = TRUE, showWarnings = FALSE)

# If current month exists in the archive, overwrite it (it shouldn't exist)
extant = setdiff(list.files(outdir), format(Sys.Date(), '%Y-%B.txt'))
# Always re-write current period
extant = setdiff(list.files(outdir), this_list['current'])

zips = URL %>% read_html %>%
# linked under "Gzip'd Text NNN KB"
html_nodes(xpath = '//a[contains(text(), "Gzip")]') %>%
# only download new archives
html_attr('href') %>% setdiff(sprintf('%s.gz', extant)) %>%
# sort should be unnecessary
sprintf('%s%s', URL, . ) %>% sort
zips = URL %>% read_html %>%
# linked under "Gzip'd Text NNN KB"
html_nodes(xpath = '//a[contains(text(), "Gzip")]') %>%
# only download new archives
html_attr('href') %>% setdiff(sprintf('%s.gz', extant)) %>%
# sort should be unnecessary
sprintf('%s%s', URL, . ) %>% sort

if (length(zips)) {
message('Acquiring ', length(zips), ' archives: ',
basename(head(zips, 1L)), ' - ', basename(tail(zips, 1L)))
} else {
message('No new zips to acquire')
}
if (length(zips)) {
message('Acquiring ', length(zips), ' archives: ',
basename(head(zips, 1L)), ' - ', basename(tail(zips, 1L)))
} else {
message('No new zips to acquire')
}

for (zip in zips) {
download.file(zip, tmp <- tempfile())
conn <- gzfile(tmp)
outfile <- gsub('.gz', '', basename(zip), fixed = TRUE)
writeLines(readLines(conn), file.path(outdir, outfile))
close(conn)
unlink(tmp)
}
for (zip in zips) {
download.file(zip, tmp <- tempfile())
conn <- gzfile(tmp)
outfile <- gsub('.gz', '', basename(zip), fixed = TRUE)
writeLines(readLines(conn), file.path(outdir, outfile))
close(conn)
unlink(tmp)
}
}

0 comments on commit 0b664df

Please sign in to comment.