awalker89 · bescoto · Nov 19, 2018 · Nov 19, 2018 · Nov 19, 2018 · Nov 19, 2018
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: openxlsx
 Type: Package
 Title: Read, Write and Edit XLSX Files
-Version: 4.1.1
-Date: 2018-05-26
+Version: 4.1.1.1003
+Date: 2018-11-19
 Authors@R: c(
     person("Alexander", "Walker",
     email = "[email protected]", role = c("aut", "cre")),
@@ -30,7 +30,7 @@ VignetteBuilder: knitr
 Suggests:
     knitr,
     testthat
-RoxygenNote: 6.0.1.9000
+RoxygenNote: 6.1.1
 Collate: 
     'CommentClass.R'
     'HyperlinkClass.R'

diff --git a/R/helperFunctions.R b/R/helperFunctions.R
@@ -512,15 +512,20 @@ get_named_regions_from_string <- function(dn){
   dn <- gsub("</workbook>", "", dn, fixed = TRUE)
 
   dn <- unique(unlist(strsplit(dn, split = "</definedName>", fixed = TRUE)))
+  dn <- dn[grepl("<definedName", dn, fixed=TRUE)]
 
   dn_names <- regmatches(dn, regexpr('(?<=name=")[^"]+', dn, perl = TRUE))
 
   dn_pos <- regmatches(dn, regexpr("(?<=>).*", dn, perl = TRUE))
-  dn_coords <- regmatches(dn_pos, regexpr("(?<=!).*", dn_pos, perl = TRUE))
-  dn_coords <- gsub("$", "", dn_coords, fixed = TRUE)
-
-  dn_sheets <- regmatches(dn_pos, regexpr(".*(?=!)", dn_pos, perl = TRUE))
-  dn_sheets <- gsub("'", "", dn_sheets, fixed = TRUE)
+  dn_pos <- gsub("[$']", "", dn_pos)
+
+  has_bang <- grepl("!", dn_pos, fixed=TRUE)
+  dn_sheets <- ifelse(has_bang,
+                      gsub("^(.*)!.*$", "\\1", dn_pos),
+                      "")
+  dn_coords <- ifelse(has_bang,
+                      gsub("^.*!(.*)$", "\\1", dn_pos),
+                      "")
 
   attr(dn_names, "sheet") <- dn_sheets
   attr(dn_names, "position") <- dn_coords

diff --git a/R/loadWorkbook.R b/R/loadWorkbook.R
@@ -144,8 +144,14 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE){
 
     workbook <- readLines(workbookXML, warn=FALSE, encoding="UTF-8")
     workbook <-  removeHeadTag(workbook)
+    sheets <- unlist(regmatches(workbook, gregexpr("(?<=<sheets>).*(?=</sheets>)", workbook, perl = TRUE)))
+    sheets <- unlist(regmatches(sheets, gregexpr("<sheet[^>]*>", sheets, perl=TRUE)))
 
-    sheets <- unlist(regmatches(workbook, gregexpr("<sheet .*/sheets>", workbook, perl = TRUE)))
+    ## Some veryHidden sheets do not have a sheet content and their rId is empty.
+    ## Such sheets need to be filtered out because otherwise their sheet names
+    ## occur in the list of all sheet names, leading to a wrong association
+    ## of sheet names with sheet indeces.
+    sheets <- grep('r:id="[[:blank:]]*"', sheets, invert = TRUE, value = TRUE)
 
     ## sheetId is meaningless
     ## sheet rId links to the workbook.xml.resl which links worksheets/sheet(i).xml file
@@ -154,6 +160,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE){
     sheetrId <- unlist(getRId(sheets))
     sheetId <- unlist(regmatches(sheets, gregexpr('(?<=sheetId=")[0-9]+', sheets, perl = TRUE)))
     sheetNames <- unlist(regmatches(sheets, gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE)))
+    sheetNames <- replaceXMLEntities(sheetNames)
 
     is_chart_sheet <- sheetrId %in% chartSheetRIds
     is_visible <- !grepl("hidden",  unlist(strsplit(sheets, split = "<sheet "))[-1])
@@ -765,7 +772,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE){
       hasDrawing <- sapply(drawXMLrelationship, length) > 0 ## which sheets have a drawing
 
       commentXMLrelationship <- lapply(xml, function(x) x[grepl("comments[0-9]+\\.xml", x)])
-      hasComment <- sapply(drawXMLrelationship, length) > 0 ## which sheets have a drawing
+      hasComment <- sapply(commentXMLrelationship, length) > 0 ## which sheets have a comment
 
       for(i in 1:length(xml)){
 

diff --git a/R/readWorkbook.R b/R/readWorkbook.R
@@ -153,12 +153,20 @@ read.xlsx.default <- function(xlsxFile,
 
   workbook <- unlist(readLines(workbook, warn = FALSE, encoding = "UTF-8"))
   workbook <- removeHeadTag(workbook)
-  sheets <- unlist(regmatches(workbook, gregexpr("<sheet .*/sheets>", workbook, perl = TRUE)))
-
+  sheets <- unlist(regmatches(workbook, gregexpr("(?<=<sheets>).*(?=</sheets>)", workbook, perl = TRUE)))
+  sheets <- unlist(regmatches(sheets, gregexpr("<sheet[^>]*>", sheets, perl=TRUE)))
+
+  ## Some veryHidden sheets do not have a sheet content and their rId is empty.
+  ## Such sheets need to be filtered out because otherwise their sheet names
+  ## occur in the list of all sheet names, leading to a wrong association
+  ## of sheet names with sheet indeces.
+  sheets <- grep('r:id="[[:blank:]]*"', sheets, invert = TRUE, value = TRUE)
+
   ## make sure sheetId is 1 based
   sheetrId <- unlist(getRId(sheets))
   sheetNames <- unlist(regmatches(sheets, gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE)))
-
+  sheetNames <- replaceXMLEntities(sheetNames)
+
   nSheets <- length(sheetrId)
   if(nSheets == 0)
     stop("Workbook has no worksheets")

diff --git a/R/wrappers.R b/R/wrappers.R
@@ -3061,7 +3061,15 @@ getSheetNames <- function(file){
   workbook <- xmlFiles[grepl("workbook.xml$", xmlFiles, perl = TRUE)]
   workbook <- readLines(workbook, warn=FALSE, encoding="UTF-8")
   workbook <-  removeHeadTag(workbook)
-  sheets <- unlist(regmatches(workbook, gregexpr("<sheet .*/sheets>", workbook, perl = TRUE)))
+  sheets <- unlist(regmatches(workbook, gregexpr("(?<=<sheets>).*(?=</sheets>)", workbook, perl = TRUE)))
+  sheets <- unlist(regmatches(sheets, gregexpr("<sheet[^>]*>", sheets, perl=TRUE)))
+
+  ## Some veryHidden sheets do not have a sheet content and their rId is empty.
+  ## Such sheets need to be filtered out because otherwise their sheet names
+  ## occur in the list of all sheet names, leading to a wrong association
+  ## of sheet names with sheet indeces.
+  sheets <- grep('r:id="[[:blank:]]*"', sheets, invert = TRUE, value = TRUE)
+
   sheetNames <- unlist(regmatches(sheets, gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE)))
   sheetNames <- replaceXMLEntities(sheetNames)
 

diff --git a/inst/namedRegions2.xlsx b/inst/namedRegions2.xlsx
diff --git a/man/addStyle.Rd b/man/addStyle.Rd
diff --git a/man/addWorksheet.Rd b/man/addWorksheet.Rd
diff --git a/man/createWorkbook.Rd b/man/createWorkbook.Rd
diff --git a/man/insertPlot.Rd b/man/insertPlot.Rd
diff --git a/man/makeHyperlinkString.Rd b/man/makeHyperlinkString.Rd
diff --git a/man/pageSetup.Rd b/man/pageSetup.Rd
diff --git a/man/read.xlsx.Rd b/man/read.xlsx.Rd
diff --git a/man/readWorkbook.Rd b/man/readWorkbook.Rd
diff --git a/tests/testthat/test-named_regions.R b/tests/testthat/test-named_regions.R
@@ -118,6 +118,25 @@ test_that("Correctly Loading Named Regions Created in Excel",{
 })
 
 
+test_that("Load names from an Excel file with funky non-region names", {
+  filename <- system.file("namedRegions2.xlsx", package = "openxlsx")
+  wb <- loadWorkbook(filename)
+  names <- getNamedRegions(wb)
+  sheets <- attr(names, "sheet")
+  positions <- attr(names, "position")
+
+  expect_true(length(names) == length(sheets))
+  expect_true(length(names) == length(positions))
+  expect_equal(head(names, 5),
+               c("barref", "barref", "fooref", "fooref", "IQ_CH"))
+  expect_equal(sheets,
+               c("Sheet with space", "Sheet1", "Sheet with space", "Sheet1",
+                 rep("", 26)))
+  expect_equal(positions, c("B4", "B4", "B3", "B3", rep("", 26)))
+
+  names2 <- getNamedRegions(filename)
+  expect_equal(names, names2)
+})
 
 
 test_that("Missing rows in named regions", {