diff --git a/update_archives.R b/update_archives.R index 873842d..889cbfc 100755 --- a/update_archives.R +++ b/update_archives.R @@ -1,34 +1,44 @@ #!/usr/local/bin/Rscript library(rvest) -URL = 'https://stat.ethz.ch/pipermail/r-devel/' +URL_BASE ='https://stat.ethz.ch/pipermail' +today = as.POSIXlt(Sys.time()) +mailing_lists = list( + c(name = 'r-devel', current = format(today, '%Y-%B.txt')), + c(name = 'r-package-devel', + current = sprintf('%dq%d.txt', today$year + 1900L, today$mon %/% 3L + 1L)) +) +for (ii in seq_along(mailing_lists)) { + this_list = mailing_lists[[ii]] + URL = file.path(URL_BASE, this_list['name'], '') -outdir = 'r-devel' -dir.create(outdir, recursive = TRUE, showWarnings = FALSE) + outdir = this_list['name'] + dir.create(outdir, recursive = TRUE, showWarnings = FALSE) -# If current month exists in the archive, overwrite it (it shouldn't exist) -extant = setdiff(list.files(outdir), format(Sys.Date(), '%Y-%B.txt')) + # Always re-write current period + extant = setdiff(list.files(outdir), this_list['current']) -zips = URL %>% read_html %>% - # linked under "Gzip'd Text NNN KB" - html_nodes(xpath = '//a[contains(text(), "Gzip")]') %>% - # only download new archives - html_attr('href') %>% setdiff(sprintf('%s.gz', extant)) %>% - # sort should be unnecessary - sprintf('%s%s', URL, . ) %>% sort + zips = URL %>% read_html %>% + # linked under "Gzip'd Text NNN KB" + html_nodes(xpath = '//a[contains(text(), "Gzip")]') %>% + # only download new archives + html_attr('href') %>% setdiff(sprintf('%s.gz', extant)) %>% + # sort should be unnecessary + sprintf('%s%s', URL, . ) %>% sort -if (length(zips)) { - message('Acquiring ', length(zips), ' archives: ', - basename(head(zips, 1L)), ' - ', basename(tail(zips, 1L))) -} else { - message('No new zips to acquire') -} + if (length(zips)) { + message('Acquiring ', length(zips), ' archives: ', + basename(head(zips, 1L)), ' - ', basename(tail(zips, 1L))) + } else { + message('No new zips to acquire') + } -for (zip in zips) { - download.file(zip, tmp <- tempfile()) - conn <- gzfile(tmp) - outfile <- gsub('.gz', '', basename(zip), fixed = TRUE) - writeLines(readLines(conn), file.path(outdir, outfile)) - close(conn) - unlink(tmp) -} + for (zip in zips) { + download.file(zip, tmp <- tempfile()) + conn <- gzfile(tmp) + outfile <- gsub('.gz', '', basename(zip), fixed = TRUE) + writeLines(readLines(conn), file.path(outdir, outfile)) + close(conn) + unlink(tmp) + } +} \ No newline at end of file