-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_scrape.R
31 lines (23 loc) · 1.27 KB
/
text_scrape.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
library(tidyverse)
rmd<-"SOE-MAFMC-2020.Rmd"
output<-"MA-alt-text-prep.csv"
text_scrape<-function(rmd, output){
rmd<-readLines(here::here(rmd)) # read rmd
## Grab code chunk names
r.title<-stringr::str_extract(string = rmd, pattern =("\\{\\s*r (.*?),")) %>% # scrape code chunk name
stringr::str_remove(., pattern = "\\{\\s*r") %>% # remove {r
stringr::str_remove(., pattern = ",") # rmeove comma
## Grab fig captions
fig.cap<-stringr::str_extract(string = rmd, pattern =("fig.cap\\s*=(.*?)\\s*\\}")) %>% # scrape everything after the fig.cap=
stringr::str_remove(., pattern =("fig.cap\\s*=")) %>% # remove fig.cap=
stringr::str_remove(., pattern =("paste0\\(\\s*")) %>% # remove paste0(
stringr::str_split_fixed(., pattern =("\\,\\s*fig.*"), n = 2) %>% # remove everything after the next fig.xx=
stringr::str_split_fixed(., pattern =("\\s*message*"), n = 2) %>% # remove everything after message =
stringr::str_remove(., pattern =("\\s*\\}")) # remove last }
## Combine grabs inro df
df1<-cbind(r.title, fig.cap) # combine columns into data frame
text.scrape<- as.data.frame(na.omit(df1)) %>% # remove rows with NA
dplyr::filter(!fig.cap == "") # remove rows with no caption
## Build csv
write.csv(text.scrape, file = output)
}