Skip to content

Commit

Permalink
Merge pull request #8663 from radarhere/bigtiff
Browse files Browse the repository at this point in the history
  • Loading branch information
hugovk authored Jan 8, 2025
2 parents 93f45a3 + a8381c6 commit af3b904
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 45 deletions.
45 changes: 41 additions & 4 deletions Tests/test_file_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,16 @@ def test_bigtiff(self, tmp_path: Path) -> None:

def test_bigtiff_save(self, tmp_path: Path) -> None:
outfile = str(tmp_path / "temp.tif")
hopper().save(outfile, big_tiff=True)
im = hopper()
im.save(outfile, big_tiff=True)

with Image.open(outfile) as im:
assert im.tag_v2._bigtiff is True
with Image.open(outfile) as reloaded:
assert reloaded.tag_v2._bigtiff is True

im.save(outfile, save_all=True, append_images=[im], big_tiff=True)

with Image.open(outfile) as reloaded:
assert reloaded.tag_v2._bigtiff is True

def test_seek_too_large(self) -> None:
with pytest.raises(ValueError, match="Unable to seek to frame"):
Expand Down Expand Up @@ -740,7 +746,7 @@ def im_generator(ims: list[Image.Image]) -> Generator[Image.Image, None, None]:
assert reread.n_frames == 3

def test_fixoffsets(self) -> None:
b = BytesIO(b"II\x2a\x00\x00\x00\x00\x00")
b = BytesIO(b"II\x2A\x00\x00\x00\x00\x00")
with TiffImagePlugin.AppendingTiffWriter(b) as a:
b.seek(0)
a.fixOffsets(1, isShort=True)
Expand All @@ -753,6 +759,37 @@ def test_fixoffsets(self) -> None:
with pytest.raises(RuntimeError):
a.fixOffsets(1)

b = BytesIO(b"II\x2A\x00\x00\x00\x00\x00")
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.offsetOfNewPage = 2**16

b.seek(0)
a.fixOffsets(1, isShort=True)

b = BytesIO(b"II\x2B\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.offsetOfNewPage = 2**32

b.seek(0)
a.fixOffsets(1, isShort=True)

b.seek(0)
a.fixOffsets(1, isLong=True)

def test_appending_tiff_writer_writelong(self) -> None:
data = b"II\x2A\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
b = BytesIO(data)
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.writeLong(2**32 - 1)
assert b.getvalue() == data + b"\xff\xff\xff\xff"

def test_appending_tiff_writer_rewritelastshorttolong(self) -> None:
data = b"II\x2A\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
b = BytesIO(data)
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.rewriteLastShortToLong(2**32 - 1)
assert b.getvalue() == data[:-2] + b"\xff\xff\xff\xff"

def test_saving_icc_profile(self, tmp_path: Path) -> None:
# Tests saving TIFF with icc_profile set.
# At the time of writing this will only work for non-compressed tiffs
Expand Down
108 changes: 67 additions & 41 deletions src/PIL/TiffImagePlugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,13 +962,16 @@ def tobytes(self, offset: int = 0) -> bytes:
result = self._pack("Q" if self._bigtiff else "H", len(self._tags_v2))

entries: list[tuple[int, int, int, bytes, bytes]] = []
offset += len(result) + len(self._tags_v2) * (20 if self._bigtiff else 12) + 4

fmt = "Q" if self._bigtiff else "L"
fmt_size = 8 if self._bigtiff else 4
offset += (
len(result) + len(self._tags_v2) * (20 if self._bigtiff else 12) + fmt_size
)
stripoffsets = None

# pass 1: convert tags to binary format
# always write tags in ascending order
fmt = "Q" if self._bigtiff else "L"
fmt_size = 8 if self._bigtiff else 4
for tag, value in sorted(self._tags_v2.items()):
if tag == STRIPOFFSETS:
stripoffsets = len(entries)
Expand Down Expand Up @@ -1024,7 +1027,7 @@ def tobytes(self, offset: int = 0) -> bytes:
)

# -- overwrite here for multi-page --
result += b"\0\0\0\0" # end of entries
result += self._pack(fmt, 0) # end of entries

# pass 3: write auxiliary data to file
for tag, typ, count, value, data in entries:
Expand Down Expand Up @@ -2044,20 +2047,21 @@ def setup(self) -> None:
self.offsetOfNewPage = 0

self.IIMM = iimm = self.f.read(4)
self._bigtiff = b"\x2B" in iimm
if not iimm:
# empty file - first page
self.isFirst = True
return

self.isFirst = False
if iimm == b"II\x2a\x00":
self.setEndian("<")
elif iimm == b"MM\x00\x2a":
self.setEndian(">")
else:
if iimm not in PREFIXES:
msg = "Invalid TIFF file header"
raise RuntimeError(msg)

self.setEndian("<" if iimm.startswith(II) else ">")

if self._bigtiff:
self.f.seek(4, os.SEEK_CUR)
self.skipIFDs()
self.goToEnd()

Expand All @@ -2077,11 +2081,13 @@ def finalize(self) -> None:
msg = "IIMM of new page doesn't match IIMM of first page"
raise RuntimeError(msg)

ifd_offset = self.readLong()
if self._bigtiff:
self.f.seek(4, os.SEEK_CUR)
ifd_offset = self._read(8 if self._bigtiff else 4)
ifd_offset += self.offsetOfNewPage
assert self.whereToWriteNewIFDOffset is not None
self.f.seek(self.whereToWriteNewIFDOffset)
self.writeLong(ifd_offset)
self._write(ifd_offset, 8 if self._bigtiff else 4)
self.f.seek(ifd_offset)
self.fixIFD()

Expand Down Expand Up @@ -2127,18 +2133,20 @@ def setEndian(self, endian: str) -> None:
self.endian = endian
self.longFmt = f"{self.endian}L"
self.shortFmt = f"{self.endian}H"
self.tagFormat = f"{self.endian}HHL"
self.tagFormat = f"{self.endian}HH" + ("Q" if self._bigtiff else "L")

def skipIFDs(self) -> None:
while True:
ifd_offset = self.readLong()
ifd_offset = self._read(8 if self._bigtiff else 4)
if ifd_offset == 0:
self.whereToWriteNewIFDOffset = self.f.tell() - 4
self.whereToWriteNewIFDOffset = self.f.tell() - (
8 if self._bigtiff else 4
)
break

self.f.seek(ifd_offset)
num_tags = self.readShort()
self.f.seek(num_tags * 12, os.SEEK_CUR)
num_tags = self._read(8 if self._bigtiff else 2)
self.f.seek(num_tags * (20 if self._bigtiff else 12), os.SEEK_CUR)

def write(self, data: Buffer, /) -> int:
return self.f.write(data)
Expand Down Expand Up @@ -2168,81 +2176,99 @@ def _verify_bytes_written(bytes_written: int | None, expected: int) -> None:
msg = f"wrote only {bytes_written} bytes but wanted {expected}"
raise RuntimeError(msg)

def rewriteLastShortToLong(self, value: int) -> None:
self.f.seek(-2, os.SEEK_CUR)
bytes_written = self.f.write(struct.pack(self.longFmt, value))
self._verify_bytes_written(bytes_written, 4)

def _rewriteLast(self, value: int, field_size: int) -> None:
def _rewriteLast(
self, value: int, field_size: int, new_field_size: int = 0
) -> None:
self.f.seek(-field_size, os.SEEK_CUR)
if not new_field_size:
new_field_size = field_size
bytes_written = self.f.write(
struct.pack(self.endian + self._fmt(field_size), value)
struct.pack(self.endian + self._fmt(new_field_size), value)
)
self._verify_bytes_written(bytes_written, field_size)
self._verify_bytes_written(bytes_written, new_field_size)

def rewriteLastShortToLong(self, value: int) -> None:
self._rewriteLast(value, 2, 4)

def rewriteLastShort(self, value: int) -> None:
return self._rewriteLast(value, 2)

def rewriteLastLong(self, value: int) -> None:
return self._rewriteLast(value, 4)

def _write(self, value: int, field_size: int) -> None:
bytes_written = self.f.write(
struct.pack(self.endian + self._fmt(field_size), value)
)
self._verify_bytes_written(bytes_written, field_size)

def writeShort(self, value: int) -> None:
bytes_written = self.f.write(struct.pack(self.shortFmt, value))
self._verify_bytes_written(bytes_written, 2)
self._write(value, 2)

def writeLong(self, value: int) -> None:
bytes_written = self.f.write(struct.pack(self.longFmt, value))
self._verify_bytes_written(bytes_written, 4)
self._write(value, 4)

def close(self) -> None:
self.finalize()
if self.close_fp:
self.f.close()

def fixIFD(self) -> None:
num_tags = self.readShort()
num_tags = self._read(8 if self._bigtiff else 2)

for i in range(num_tags):
tag, field_type, count = struct.unpack(self.tagFormat, self.f.read(8))
tag, field_type, count = struct.unpack(
self.tagFormat, self.f.read(12 if self._bigtiff else 8)
)

field_size = self.fieldSizes[field_type]
total_size = field_size * count
is_local = total_size <= 4
fmt_size = 8 if self._bigtiff else 4
is_local = total_size <= fmt_size
if not is_local:
offset = self.readLong() + self.offsetOfNewPage
self.rewriteLastLong(offset)
offset = self._read(fmt_size) + self.offsetOfNewPage
self._rewriteLast(offset, fmt_size)

if tag in self.Tags:
cur_pos = self.f.tell()

if is_local:
self._fixOffsets(count, field_size)
self.f.seek(cur_pos + 4)
self.f.seek(cur_pos + fmt_size)
else:
self.f.seek(offset)
self._fixOffsets(count, field_size)
self.f.seek(cur_pos)

elif is_local:
# skip the locally stored value that is not an offset
self.f.seek(4, os.SEEK_CUR)
self.f.seek(fmt_size, os.SEEK_CUR)

def _fixOffsets(self, count: int, field_size: int) -> None:
for i in range(count):
offset = self._read(field_size)
offset += self.offsetOfNewPage
if field_size == 2 and offset >= 65536:
# offset is now too large - we must convert shorts to longs

new_field_size = 0
if self._bigtiff and field_size in (2, 4) and offset >= 2**32:
# offset is now too large - we must convert long to long8
new_field_size = 8
elif field_size == 2 and offset >= 2**16:
# offset is now too large - we must convert short to long
new_field_size = 4
if new_field_size:
if count != 1:
msg = "not implemented"
raise RuntimeError(msg) # XXX TODO

# simple case - the offset is just one and therefore it is
# local (not referenced with another offset)
self.rewriteLastShortToLong(offset)
self.f.seek(-10, os.SEEK_CUR)
self.writeShort(TiffTags.LONG) # rewrite the type to LONG
self.f.seek(8, os.SEEK_CUR)
self._rewriteLast(offset, field_size, new_field_size)
# Move back past the new offset, past 'count', and before 'field_type'
rewind = -new_field_size - 4 - 2
self.f.seek(rewind, os.SEEK_CUR)
self.writeShort(new_field_size) # rewrite the type
self.f.seek(2 - rewind, os.SEEK_CUR)
else:
self._rewriteLast(offset, field_size)

Expand Down

0 comments on commit af3b904

Please sign in to comment.