Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of non-region names in getNamedRegions and add related test #474

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: openxlsx
Type: Package
Title: Read, Write and Edit XLSX Files
Version: 4.1.1
Date: 2018-05-26
Version: 4.1.1.1003
Date: 2018-11-19
Authors@R: c(
person("Alexander", "Walker",
email = "[email protected]", role = c("aut", "cre")),
Expand Down Expand Up @@ -30,7 +30,7 @@ VignetteBuilder: knitr
Suggests:
knitr,
testthat
RoxygenNote: 6.0.1.9000
RoxygenNote: 6.1.1
Collate:
'CommentClass.R'
'HyperlinkClass.R'
Expand Down
15 changes: 10 additions & 5 deletions R/helperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -512,15 +512,20 @@ get_named_regions_from_string <- function(dn){
dn <- gsub("</workbook>", "", dn, fixed = TRUE)

dn <- unique(unlist(strsplit(dn, split = "</definedName>", fixed = TRUE)))
dn <- dn[grepl("<definedName", dn, fixed=TRUE)]

dn_names <- regmatches(dn, regexpr('(?<=name=")[^"]+', dn, perl = TRUE))

dn_pos <- regmatches(dn, regexpr("(?<=>).*", dn, perl = TRUE))
dn_coords <- regmatches(dn_pos, regexpr("(?<=!).*", dn_pos, perl = TRUE))
dn_coords <- gsub("$", "", dn_coords, fixed = TRUE)

dn_sheets <- regmatches(dn_pos, regexpr(".*(?=!)", dn_pos, perl = TRUE))
dn_sheets <- gsub("'", "", dn_sheets, fixed = TRUE)
dn_pos <- gsub("[$']", "", dn_pos)

has_bang <- grepl("!", dn_pos, fixed=TRUE)
dn_sheets <- ifelse(has_bang,
gsub("^(.*)!.*$", "\\1", dn_pos),
"")
dn_coords <- ifelse(has_bang,
gsub("^.*!(.*)$", "\\1", dn_pos),
"")

attr(dn_names, "sheet") <- dn_sheets
attr(dn_names, "position") <- dn_coords
Expand Down
11 changes: 9 additions & 2 deletions R/loadWorkbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,14 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE){

workbook <- readLines(workbookXML, warn=FALSE, encoding="UTF-8")
workbook <- removeHeadTag(workbook)
sheets <- unlist(regmatches(workbook, gregexpr("(?<=<sheets>).*(?=</sheets>)", workbook, perl = TRUE)))
sheets <- unlist(regmatches(sheets, gregexpr("<sheet[^>]*>", sheets, perl=TRUE)))

sheets <- unlist(regmatches(workbook, gregexpr("<sheet .*/sheets>", workbook, perl = TRUE)))
## Some veryHidden sheets do not have a sheet content and their rId is empty.
## Such sheets need to be filtered out because otherwise their sheet names
## occur in the list of all sheet names, leading to a wrong association
## of sheet names with sheet indeces.
sheets <- grep('r:id="[[:blank:]]*"', sheets, invert = TRUE, value = TRUE)

## sheetId is meaningless
## sheet rId links to the workbook.xml.resl which links worksheets/sheet(i).xml file
Expand All @@ -154,6 +160,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE){
sheetrId <- unlist(getRId(sheets))
sheetId <- unlist(regmatches(sheets, gregexpr('(?<=sheetId=")[0-9]+', sheets, perl = TRUE)))
sheetNames <- unlist(regmatches(sheets, gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE)))
sheetNames <- replaceXMLEntities(sheetNames)

is_chart_sheet <- sheetrId %in% chartSheetRIds
is_visible <- !grepl("hidden", unlist(strsplit(sheets, split = "<sheet "))[-1])
Expand Down Expand Up @@ -765,7 +772,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE){
hasDrawing <- sapply(drawXMLrelationship, length) > 0 ## which sheets have a drawing

commentXMLrelationship <- lapply(xml, function(x) x[grepl("comments[0-9]+\\.xml", x)])
hasComment <- sapply(drawXMLrelationship, length) > 0 ## which sheets have a drawing
hasComment <- sapply(commentXMLrelationship, length) > 0 ## which sheets have a comment

for(i in 1:length(xml)){

Expand Down
14 changes: 11 additions & 3 deletions R/readWorkbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,20 @@ read.xlsx.default <- function(xlsxFile,

workbook <- unlist(readLines(workbook, warn = FALSE, encoding = "UTF-8"))
workbook <- removeHeadTag(workbook)
sheets <- unlist(regmatches(workbook, gregexpr("<sheet .*/sheets>", workbook, perl = TRUE)))

sheets <- unlist(regmatches(workbook, gregexpr("(?<=<sheets>).*(?=</sheets>)", workbook, perl = TRUE)))
sheets <- unlist(regmatches(sheets, gregexpr("<sheet[^>]*>", sheets, perl=TRUE)))

## Some veryHidden sheets do not have a sheet content and their rId is empty.
## Such sheets need to be filtered out because otherwise their sheet names
## occur in the list of all sheet names, leading to a wrong association
## of sheet names with sheet indeces.
sheets <- grep('r:id="[[:blank:]]*"', sheets, invert = TRUE, value = TRUE)

## make sure sheetId is 1 based
sheetrId <- unlist(getRId(sheets))
sheetNames <- unlist(regmatches(sheets, gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE)))

sheetNames <- replaceXMLEntities(sheetNames)

nSheets <- length(sheetrId)
if(nSheets == 0)
stop("Workbook has no worksheets")
Expand Down
10 changes: 9 additions & 1 deletion R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -3061,7 +3061,15 @@ getSheetNames <- function(file){
workbook <- xmlFiles[grepl("workbook.xml$", xmlFiles, perl = TRUE)]
workbook <- readLines(workbook, warn=FALSE, encoding="UTF-8")
workbook <- removeHeadTag(workbook)
sheets <- unlist(regmatches(workbook, gregexpr("<sheet .*/sheets>", workbook, perl = TRUE)))
sheets <- unlist(regmatches(workbook, gregexpr("(?<=<sheets>).*(?=</sheets>)", workbook, perl = TRUE)))
sheets <- unlist(regmatches(sheets, gregexpr("<sheet[^>]*>", sheets, perl=TRUE)))

## Some veryHidden sheets do not have a sheet content and their rId is empty.
## Such sheets need to be filtered out because otherwise their sheet names
## occur in the list of all sheet names, leading to a wrong association
## of sheet names with sheet indeces.
sheets <- grep('r:id="[[:blank:]]*"', sheets, invert = TRUE, value = TRUE)

sheetNames <- unlist(regmatches(sheets, gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE)))
sheetNames <- replaceXMLEntities(sheetNames)

Expand Down
Binary file added inst/namedRegions2.xlsx
Binary file not shown.
3 changes: 2 additions & 1 deletion man/addStyle.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions man/addWorksheet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/createWorkbook.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/insertPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/makeHyperlinkString.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/pageSetup.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/read.xlsx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/readWorkbook.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions tests/testthat/test-named_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,25 @@ test_that("Correctly Loading Named Regions Created in Excel",{
})


test_that("Load names from an Excel file with funky non-region names", {
filename <- system.file("namedRegions2.xlsx", package = "openxlsx")
wb <- loadWorkbook(filename)
names <- getNamedRegions(wb)
sheets <- attr(names, "sheet")
positions <- attr(names, "position")

expect_true(length(names) == length(sheets))
expect_true(length(names) == length(positions))
expect_equal(head(names, 5),
c("barref", "barref", "fooref", "fooref", "IQ_CH"))
expect_equal(sheets,
c("Sheet with space", "Sheet1", "Sheet with space", "Sheet1",
rep("", 26)))
expect_equal(positions, c("B4", "B4", "B3", "B3", rep("", 26)))

names2 <- getNamedRegions(filename)
expect_equal(names, names2)
})


test_that("Missing rows in named regions", {
Expand Down