Skip to content

Commit

Permalink
Fix TypeError in text-positioning operators Td and TD by safely par…
Browse files Browse the repository at this point in the history
…sing float (#1000)
  • Loading branch information
pietermarsman authored Jul 9, 2024
1 parent 1aa6145 commit 88139ad
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- `ValueError` when corrupt PDF specifies a negative xref location ([#980](http://github.com/pdfminer/pdfminer.six/pull/980))
- `ValueError` when corrupt PDF specifies an invalid mediabox ([#987](https://github.com/pdfminer/pdfminer.six/pull/987))
- `RecursionError` when corrupt PDF specifies a recursive /Pages object ([#998](https://github.com/pdfminer/pdfminer.six/pull/998))
- `TypeError` when corrupt PDF specifies text-positioning operators with invalid values ([#1000](https://github.com/pdfminer/pdfminer.six/pull/1000))

### Removed

Expand Down
7 changes: 7 additions & 0 deletions pdfminer/casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,10 @@ def safe_int(o: Any) -> Optional[int]:
return int(o)
except (TypeError, ValueError):
return None


def safe_float(o: Any) -> Optional[float]:
try:
return float(o)
except (TypeError, ValueError):
return None
51 changes: 38 additions & 13 deletions pdfminer/pdfinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast

from pdfminer import settings
from pdfminer.casting import safe_float
from pdfminer.cmapdb import CMap, CMapBase, CMapDB
from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace
from pdfminer.pdfdevice import PDFDevice, PDFTextSeq
from pdfminer.pdfexceptions import PDFException
from pdfminer.pdfexceptions import PDFException, PDFValueError
from pdfminer.pdffont import (
PDFCIDFont,
PDFFont,
Expand Down Expand Up @@ -791,20 +792,44 @@ def do_Ts(self, rise: PDFStackT) -> None:
self.textstate.rise = cast(float, rise)

def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None:
"""Move text position"""
tx = cast(float, tx)
ty = cast(float, ty)
(a, b, c, d, e, f) = self.textstate.matrix
self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f)
"""Move to the start of the next line
Offset from the start of the current line by (tx , ty).
"""
tx_ = safe_float(tx)
ty_ = safe_float(ty)
if tx_ is not None and ty_ is not None:
(a, b, c, d, e, f) = self.textstate.matrix
e_new = tx_ * a + ty_ * c + e
f_new = tx_ * b + ty_ * d + f
self.textstate.matrix = (a, b, c, d, e_new, f_new)

elif settings.STRICT:
raise PDFValueError(f"Invalid offset ({tx!r}, {ty!r}) for Td")

self.textstate.linematrix = (0, 0)

def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None:
"""Move text position and set leading"""
tx = cast(float, tx)
ty = cast(float, ty)
(a, b, c, d, e, f) = self.textstate.matrix
self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f)
self.textstate.leading = ty
"""Move to the start of the next line.
offset from the start of the current line by (tx , ty). As a side effect, this
operator sets the leading parameter in the text state.
"""
tx_ = safe_float(tx)
ty_ = safe_float(ty)

if tx_ is not None and ty_ is not None:
(a, b, c, d, e, f) = self.textstate.matrix
e_new = tx_ * a + ty_ * c + e
f_new = tx_ * b + ty_ * d + f
self.textstate.matrix = (a, b, c, d, e_new, f_new)

elif settings.STRICT:
raise PDFValueError("Invalid offset ({tx}, {ty}) for TD")

if ty_ is not None:
self.textstate.leading = ty_

self.textstate.linematrix = (0, 0)

def do_Tm(
Expand Down Expand Up @@ -961,7 +986,7 @@ def execute(self, streams: Sequence[object]) -> None:
except PSEOF:
# empty page
return
while 1:
while True:
try:
(_, obj) = parser.nextobject()
except PSEOF:
Expand Down

0 comments on commit 88139ad

Please sign in to comment.