Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use sync pools for shared string parser #828

Merged
merged 1 commit into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ func autoFilterDefinedName(sheet *Sheet, sheetIndex int) (*xlsxDefinedName, erro
// representing the file in terms of the structure of an XLSX file.
func (f *File) MakeStreamParts() (map[string]string, error) {
var parts map[string]string
var refTable *RefTable = NewSharedStringRefTable(10000) // 10000 is arbitrary
var refTable *RefTable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
refTable.isWrite = true
var workbookRels WorkBookRels = make(WorkBookRels)
var err error
Expand Down Expand Up @@ -465,7 +465,7 @@ func (f *File) MakeStreamParts() (map[string]string, error) {
// MarshallParts constructs a map of file name to XML content representing the file
// in terms of the structure of an XLSX file.
func (f *File) MarshallParts(zipWriter *zip.Writer) error {
var refTable *RefTable = NewSharedStringRefTable(10000) // 10000 is arbitrary
var refTable *RefTable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
refTable.isWrite = true
var workbookRels WorkBookRels = make(WorkBookRels)
var err error
Expand Down Expand Up @@ -650,9 +650,10 @@ func (f *File) MarshallParts(zipWriter *zip.Writer) error {
// Here, value would be set to the raw value of the cell A1 in the
// first sheet in the XLSX file.
func (f *File) ToSlice() (output [][][]string, err error) {
output = [][][]string{}
sheetCount := len(f.Sheets)
output = make([][][]string, 0, sheetCount)
for _, sheet := range f.Sheets {
s := [][]string{}
s := make([][]string, 0, sheet.MaxRow)
err := sheet.ForEachRow(func(row *Row) error {
r := []string{}
err := row.ForEachCell(func(cell *Cell) error {
Expand Down
126 changes: 114 additions & 12 deletions lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"runtime/debug"
"strconv"
"strings"
"sync"
)

const (
Expand All @@ -22,6 +23,26 @@ const (
externalSheetBangChar = "!"
)

var (
tokPool = sync.Pool{
New: func() interface{} {
return &xml.StartElement{}
},
}

xlsxSIPool = sync.Pool{
New: func() interface{} {
return &xlsxSI{}
},
}

xmlAttrPool = sync.Pool{
New: func() interface{} {
return &xml.Attr{}
},
}
)

// XLSXReaderError is the standard error type for otherwise undefined
// errors in the XSLX reading process.
type XLSXReaderError struct {
Expand Down Expand Up @@ -845,15 +866,104 @@ func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]strin
return sheetsByName, sheets, err
}

func readSharedStrings(rc io.Reader) (*RefTable, error) {
var err error
var decoder *xml.Decoder
var reftable *RefTable
var tok xml.Token
var count int
var countS string
var ok bool
var si *xlsxSI
var attr *xml.Attr

wrap := func(err error) (*RefTable, error) {
return nil, fmt.Errorf("readSharedStrings: %w", err)
}

decoder = xml.NewDecoder(rc)

for {
tok = tokPool.Get().(xml.Token)
tok, err = decoder.Token()
if tok == nil {
break
} else if err == io.EOF {
break
}
if err != nil {
return wrap(err)
}
switch ty := tok.(type) {
case xml.StartElement:
switch ty.Name.Local {
case "sst":
attr = xmlAttrPool.Get().(*xml.Attr)
ok = false
for _, (*attr) = range ty.Attr {
if attr.Name.Local == "count" {
countS = attr.Value
ok = true
break
}
}
xmlAttrPool.Put(attr)
if !ok {
// No hints on the size, so we'll just start with
// a decent number of entries to avoid small
// allocs.
reftable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
reftable.isWrite = false //Todo, do we actually use this?
} else {
count, err = strconv.Atoi(countS)
if err != nil {
return wrap(err)
}
reftable = NewSharedStringRefTable(count)
reftable.isWrite = false //Todo, do we actually use this?
}
case "si":
if reftable == nil {
return wrap(fmt.Errorf("si encountered before reftable created"))
}
si = xlsxSIPool.Get().(*xlsxSI)
if err = decoder.DecodeElement(si, &ty); err != nil {
xlsxSIPool.Put(si)
return wrap(err)
}
if len(si.R) > 0 {
reftable.AddRichText(xmlToRichText(si.R))
} else {
reftable.AddString(si.T.getText())
}
// clean up before returning to the pool, without
// these lines you'll see weird effects when reading
// another set of shared strings
si.R = nil
si.T = nil
xlsxSIPool.Put(si)
default:
// Do nothing
}
default:
// Do nothing
}
tokPool.Put(tok)
}

if reftable == nil {
panic("Unitialised reftable")
}
return reftable, nil

}

// readSharedStringsFromZipFile() is an internal helper function to
// extract a reference table from the sharedStrings.xml file within
// the XLSX zip file.
func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
var sst *xlsxSST
var err error
var rc io.ReadCloser
var decoder *xml.Decoder
var reftable *RefTable

wrap := func(err error) (*RefTable, error) {
return nil, fmt.Errorf("readSharedStringsFromZipFile: %w", err)
Expand All @@ -870,15 +980,7 @@ func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
return wrap(err)
}
defer rc.Close()

sst = new(xlsxSST)
decoder = xml.NewDecoder(rc)
err = decoder.Decode(sst)
if err != nil {
return wrap(err)
}
reftable = MakeSharedStringRefTable(sst)
return reftable, nil
return readSharedStrings(rc)
}

// readStylesFromZipFile() is an internal helper function to
Expand Down
76 changes: 29 additions & 47 deletions lib_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ func TestLib(t *testing.T) {
// })

csRunC(c, "ReadRowsFromSheet", func(c *qt.C, constructor CellStoreConstructor) {
var err error
var sharedstringsXML = bytes.NewBufferString(`
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="4" uniqueCount="4">
Expand Down Expand Up @@ -337,14 +338,12 @@ func TestLib(t *testing.T) {
footer="0.3"/>
</worksheet>`)
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
err = xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheet("test")
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -433,12 +432,10 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
Expand Down Expand Up @@ -486,13 +483,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -568,13 +563,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -717,12 +710,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -764,13 +756,10 @@ func TestLib(t *testing.T) {
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)

sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -882,12 +871,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)

Expand Down Expand Up @@ -964,12 +952,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -1043,12 +1030,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -1334,13 +1320,10 @@ func TestLib(t *testing.T) {
err := xml.NewDecoder(sheetXML).Decode(worksheet)
c.Assert(err, qt.IsNil)

sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
Expand Down Expand Up @@ -1433,12 +1416,11 @@ func TestReadRowsFromSheet(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

worksheet.mapMergeCells()
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
Expand Down
Loading
Loading