diff --git a/CHANGELOG.md b/CHANGELOG.md index eb994507..4284aa4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - `ValueError` wrong error message when specifying codec for text output ([#902](https://github.com/pdfminer/pdfminer.six/pull/902)) - Resolve stream filter parameters ([#906](https://github.com/pdfminer/pdfminer.six/pull/906)) - Reading cmap's with whitespace in the name ([#935](https://github.com/pdfminer/pdfminer.six/pull/935)) +- Optimize `apply_png_predictor` by using lists ([#912](https://github.com/pdfminer/pdfminer.six/pull/912)) ## [20231228] diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 59cf5cd3..0afdcdf1 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -138,16 +138,16 @@ def apply_png_predictor( nbytes = colors * columns * bitspercomponent // 8 bpp = colors * bitspercomponent // 8 # number of bytes per complete pixel - buf = b"" - line_above = b"\x00" * columns + buf = [] + line_above = list(b"\x00" * columns) for scanline_i in range(0, len(data), nbytes + 1): filter_type = data[scanline_i] line_encoded = data[scanline_i + 1 : scanline_i + 1 + nbytes] - raw = b"" + raw = [] if filter_type == 0: # Filter type 0: None - raw += line_encoded + raw = list(line_encoded) elif filter_type == 1: # Filter type 1: Sub @@ -162,7 +162,7 @@ def apply_png_predictor( else: raw_x_bpp = int(raw[j - bpp]) raw_x = (sub_x + raw_x_bpp) & 255 - raw += bytes((raw_x,)) + raw.append(raw_x) elif filter_type == 2: # Filter type 2: Up @@ -173,7 +173,7 @@ def apply_png_predictor( # the prior scanline. for (up_x, prior_x) in zip(line_encoded, line_above): raw_x = (up_x + prior_x) & 255 - raw += bytes((raw_x,)) + raw.append(raw_x) elif filter_type == 3: # Filter type 3: Average @@ -191,7 +191,7 @@ def apply_png_predictor( raw_x_bpp = int(raw[j - bpp]) prior_x = int(line_above[j]) raw_x = (average_x + (raw_x_bpp + prior_x) // 2) & 255 - raw += bytes((raw_x,)) + raw.append(raw_x) elif filter_type == 4: # Filter type 4: Paeth @@ -212,14 +212,14 @@ def apply_png_predictor( prior_x = int(line_above[j]) paeth = paeth_predictor(raw_x_bpp, prior_x, prior_x_bpp) raw_x = (paeth_x + paeth) & 255 - raw += bytes((raw_x,)) + raw.append(raw_x) else: raise ValueError("Unsupported predictor value: %d" % filter_type) - buf += raw + buf.extend(raw) line_above = raw - return buf + return bytes(buf) Point = Tuple[float, float]