From e749444a3fcca6af50be845e492249003217d38f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Soma=20R=C3=A1d=C3=B3czi?= Date: Mon, 6 Dec 2021 10:57:15 +0100 Subject: [PATCH] colLimit --- file.go | 11 +++++++++++ lib.go | 29 ++++++++++++++++++++--------- lib_test.go | 26 +++++++++++++------------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/file.go b/file.go index 3ad3c5fb..f8e3593f 100644 --- a/file.go +++ b/file.go @@ -27,10 +27,12 @@ type File struct { DefinedNames []*xlsxDefinedName cellStoreConstructor CellStoreConstructor rowLimit int + colLimit int valueOnly bool } const NoRowLimit int = -1 +const NoColLimit int = -1 type FileOption func(f *File) @@ -42,6 +44,14 @@ func RowLimit(n int) FileOption { } } +// ColLimit will limit the columns handled in any given sheet to the +// first n, where n is the number of columns +func ColLimit(n int) FileOption { + return func(f *File) { + f.colLimit = n + } +} + // ValueOnly treats all NULL values as meaningless and it will delete all NULL value cells, // before decode worksheet.xml. this option can save memory and time when parsing files // with a large number of NULL values. But it may also cause accidental injury, @@ -60,6 +70,7 @@ func NewFile(options ...FileOption) *File { Sheets: make([]*Sheet, 0), DefinedNames: make([]*xlsxDefinedName, 0), rowLimit: NoRowLimit, + colLimit: NoColLimit, cellStoreConstructor: NewMemoryCellStore, } for _, opt := range options { diff --git a/lib.go b/lib.go index 6a36e434..c093fac2 100644 --- a/lib.go +++ b/lib.go @@ -182,7 +182,7 @@ func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err erro // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet // that doesn't have a DimensionRef set. The only case currently // known where this is true is with XLSX exported from Google Docs. -func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, maxy int, err error) { +func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet, colLimit int) (minx, miny, maxx, maxy int, err error) { // Note, this method could be very slow for large spreadsheets. var x, y int var maxVal int @@ -202,6 +202,12 @@ func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet) (minx, miny, maxx, m if err != nil { return wrap(err) } + + // break out of the loop if a column limit is set + if colLimit != NoColLimit && x+1 > colLimit { + break + } + if x < minx { minx = x } @@ -457,7 +463,7 @@ func fillCellDataFromInlineString(rawcell xlsxC, cell *Cell) { // rows from a XSLXWorksheet, populates them with Cells and resolves // the value references from the reference table and stores them in // the rows and columns. -func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLimit int, linkTable hyperlinkTable) error { +func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLimit, colLimit int, linkTable hyperlinkTable) error { var row *Row var maxCol, maxRow, colCount, rowCount int var reftable *RefTable @@ -474,10 +480,10 @@ func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLi return nil } reftable = file.referenceTable - if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, cellRangeChar)) == 2 && rowLimit == NoRowLimit { + if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, cellRangeChar)) == 2 && rowLimit == NoRowLimit && colLimit == NoColLimit { _, _, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref) } else { - _, _, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet) + _, _, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet, colLimit) } if err != nil { return wrap(err) @@ -545,6 +551,11 @@ func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLi return wrap(err) } + // break out of the loop if column limit is set + if colLimit != NoColLimit && colLimit < x+1 { + break + } + cellX := x cell := newCell(row, cellX) @@ -687,7 +698,7 @@ func makeHyperlinkTable(worksheet *xlsxWorksheet, fi *File, rsheet *xlsxSheet) ( // into a Sheet struct. This work can be done in parallel and so // readSheetsFromZipFile will spawn an instance of this function per // sheet and get the results back on the provided channel. -func readSheetFromFile(rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string, rowLimit int, valueOnly bool) (sheet *Sheet, errRes error) { +func readSheetFromFile(rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string, rowLimit, colLimit int, valueOnly bool) (sheet *Sheet, errRes error) { defer func() { if x := recover(); x != nil { errRes = errors.New(fmt.Sprintf("%v\n%s\n", x, debug.Stack())) @@ -714,7 +725,7 @@ func readSheetFromFile(rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string } sheet.File = fi - err = readRowsFromSheet(worksheet, fi, sheet, rowLimit, linkTable) + err = readRowsFromSheet(worksheet, fi, sheet, rowLimit, colLimit, linkTable) if err != nil { return wrap(err) } @@ -743,7 +754,7 @@ func readSheetFromFile(rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string // readSheetsFromZipFile is an internal helper function that loops // over the Worksheets defined in the XSLXWorkbook and loads them into // Sheet objects stored in the Sheets slice of a xlsx.File struct. -func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string, rowLimit int, valueOnly bool) (map[string]*Sheet, []*Sheet, error) { +func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string, rowLimit, colLimit int, valueOnly bool) (map[string]*Sheet, []*Sheet, error) { var workbook *xlsxWorkbook var err error var rc io.ReadCloser @@ -787,7 +798,7 @@ func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]strin i, rawsheet := i, rawsheet go func() { sheet, err := readSheetFromFile(rawsheet, file, - sheetXMLMap, rowLimit, valueOnly) + sheetXMLMap, rowLimit, colLimit, valueOnly) sheetChan <- &indexedSheet{ Index: i, Sheet: sheet, @@ -1073,7 +1084,7 @@ func ReadZipReader(r *zip.Reader, options ...FileOption) (*File, error) { file.styles = style } - sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap, file.rowLimit, file.valueOnly) + sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap, file.rowLimit, file.colLimit, file.valueOnly) if err != nil { return wrap(err) } diff --git a/lib_test.go b/lib_test.go index 298ac16b..4975c148 100644 --- a/lib_test.go +++ b/lib_test.go @@ -237,7 +237,7 @@ func TestLib(t *testing.T) { worksheet := new(xlsxWorksheet) err := xml.NewDecoder(sheetxml).Decode(worksheet) c.Assert(err, qt.IsNil) - minx, miny, maxx, maxy, err := calculateMaxMinFromWorksheet(worksheet) + minx, miny, maxx, maxy, err := calculateMaxMinFromWorksheet(worksheet, NoColLimit) c.Assert(err, qt.IsNil) c.Assert(minx, qt.Equals, 0) c.Assert(miny, qt.Equals, 0) @@ -346,7 +346,7 @@ func TestLib(t *testing.T) { sheet, err := NewSheet("test") c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxRow, qt.Equals, 2) c.Assert(sheet.MaxCol, qt.Equals, 2) @@ -445,7 +445,7 @@ func TestLib(t *testing.T) { // Discarding all return values; this test is a regression for // a panic due to an "index out of range." - readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) }) csRunC(c, "ReadRowsFromSheetWithLeadingEmptyRows", func(c *qt.C, constructor CellStoreConstructor) { @@ -495,7 +495,7 @@ func TestLib(t *testing.T) { c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxRow, qt.Equals, 5) c.Assert(sheet.MaxCol, qt.Equals, 1) @@ -577,7 +577,7 @@ func TestLib(t *testing.T) { c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxRow, qt.Equals, 2) c.Assert(sheet.MaxCol, qt.Equals, 4) @@ -725,7 +725,7 @@ func TestLib(t *testing.T) { c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxRow, qt.Equals, 3) c.Assert(sheet.MaxCol, qt.Equals, 3) @@ -773,7 +773,7 @@ func TestLib(t *testing.T) { c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxCol, qt.Equals, 4) c.Assert(sheet.MaxRow, qt.Equals, 8) @@ -891,7 +891,7 @@ func TestLib(t *testing.T) { lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxRow, qt.Equals, 2) c.Assert(sheet.MaxCol, qt.Equals, 4) @@ -972,7 +972,7 @@ func TestLib(t *testing.T) { c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxRow, qt.Equals, 1) c.Assert(sheet.MaxCol, qt.Equals, 6) @@ -1051,7 +1051,7 @@ func TestLib(t *testing.T) { c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxRow, qt.Equals, 1) c.Assert(sheet.MaxCol, qt.Equals, 2) @@ -1197,7 +1197,7 @@ func TestLib(t *testing.T) { lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) c.Assert(sheet.MaxCol, qt.Equals, 3) c.Assert(sheet.MaxRow, qt.Equals, 2) @@ -1345,7 +1345,7 @@ func TestLib(t *testing.T) { lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) row, err := sheet.Row(3) c.Assert(err, qt.Equals, nil) @@ -1440,7 +1440,7 @@ func TestReadRowsFromSheet(t *testing.T) { sheet, err := NewSheetWithCellStore("test", constructor) c.Assert(err, qt.IsNil) lt := make(hyperlinkTable) - err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, lt) + err = readRowsFromSheet(worksheet, file, sheet, NoRowLimit, NoColLimit, lt) c.Assert(err, qt.IsNil) row, err := sheet.Row(0) c.Assert(err, qt.Equals, nil)