Skip to content

Commit

Permalink
Merge pull request #787 from chinapandaman/PPF-786
Browse files Browse the repository at this point in the history
PPF-786: implement use full widget name
  • Loading branch information
chinapandaman authored Jan 16, 2025
2 parents 3fcb2d4 + a7935ff commit c233015
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 4 deletions.
1 change: 1 addition & 0 deletions PyPDFForm/middleware/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(

super().__init__()
self._name = name
self.full_name = None
self._value = value
self.desc = None

Expand Down
21 changes: 19 additions & 2 deletions PyPDFForm/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from reportlab.pdfbase.pdfmetrics import stringWidth

from .constants import (COMB, DEFAULT_FONT_SIZE, MULTILINE, NEW_LINE_SYMBOL,
WIDGET_TYPES, Annots, MaxLen, Rect)
WIDGET_TYPES, Annots, MaxLen, Rect, Parent, T)
from .font import (adjust_paragraph_font_size, adjust_text_field_font_size,
auto_detect_font, get_text_field_font_color,
get_text_field_font_size, text_field_font_size)
Expand Down Expand Up @@ -43,7 +43,7 @@ def set_character_x_paddings(
return widgets


def build_widgets(pdf_stream: bytes) -> Dict[str, WIDGET_TYPES]:
def build_widgets(pdf_stream: bytes, use_full_widget_name: bool) -> Dict[str, WIDGET_TYPES]:
"""Builds a widget dict given a PDF form stream."""

results = {}
Expand All @@ -53,6 +53,7 @@ def build_widgets(pdf_stream: bytes) -> Dict[str, WIDGET_TYPES]:
key = get_widget_key(widget)
_widget = construct_widget(widget, key)
if _widget is not None:
_widget.full_name = get_widget_full_key(widget)
_widget.desc = get_widget_description(widget)
if isinstance(_widget, Text):
_widget.max_length = get_text_field_max_length(widget)
Expand All @@ -73,6 +74,8 @@ def build_widgets(pdf_stream: bytes) -> Dict[str, WIDGET_TYPES]:
continue

results[key] = _widget
if _widget.full_name is not None and use_full_widget_name:
results[_widget.full_name] = results[key]
return results


Expand Down Expand Up @@ -190,6 +193,20 @@ def get_widget_key(widget: dict) -> Union[str, list, None]:
return result


def get_widget_full_key(widget: dict) -> Union[str, None]:
"""
Returns a PDF widget's full annotated key by prepending its
parent widget's key.
"""

key = get_widget_key(widget)

if Parent in widget and T in widget[Parent].get_object() and widget[Parent][T] != key:
return f"{widget[Parent][T]}.{key}"

return None


def get_widget_alignment(widget: dict) -> Union[str, list, None]:
"""Finds a PDF widget's alignment by pattern matching."""

Expand Down
12 changes: 10 additions & 2 deletions PyPDFForm/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def fill(
) -> FormWrapper:
"""Fills a PDF form."""

widgets = build_widgets(self.stream) if self.stream else {}
widgets = build_widgets(self.stream, False) if self.stream else {}

for key, value in data.items():
if key in widgets:
Expand Down Expand Up @@ -86,13 +86,15 @@ def __init__(
self.global_font_size = kwargs.get("global_font_size")
self.global_font_color = kwargs.get("global_font_color")

self.use_full_widget_name = kwargs.get("use_full_widget_name", False)

self._init_helper()

def _init_helper(self, key_to_refresh: str = None) -> None:
"""Updates all attributes when the state of the PDF stream changes."""

refresh_not_needed = {}
new_widgets = build_widgets(self.read()) if self.read() else {}
new_widgets = build_widgets(self.read(), self.use_full_widget_name) if self.read() else {}
for k, v in self.widgets.items():
if k in new_widgets:
new_widgets[k] = v
Expand Down Expand Up @@ -254,6 +256,9 @@ def update_widget_key(
) -> PdfWrapper:
"""Updates the key of an existed widget on a PDF form."""

if self.use_full_widget_name:
raise NotImplementedError

if defer:
self._keys_to_update.append((old_key, new_key, index))
return self
Expand All @@ -268,6 +273,9 @@ def update_widget_key(
def commit_widget_key_updates(self) -> PdfWrapper:
"""Commits all deferred widget key updates on a PDF form."""

if self.use_full_widget_name:
raise NotImplementedError

old_keys = [each[0] for each in self._keys_to_update]
new_keys = [each[1] for each in self._keys_to_update]
indices = [each[2] for each in self._keys_to_update]
Expand Down
19 changes: 19 additions & 0 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,25 @@ with open("sample_template.pdf", "rb+") as template:
This adaptation is universal across all APIs of PyPDFForm. So in later sections of the documentation whenever you see
a function parameter that's a file path you can safely switch them for a file object or file stream.

## Use full widget name in PDF wrapper (beta)

**NOTE:** This is a beta feature, meaning it still needs to be tested against more PDF forms and may not work for
some of them.

According to section 12.7.3.2 found on page 434 of [the PDF standard](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf), each PDF form widget can have a fully qualified name that is not explicitly defined but can be constructed following the pattern `<parent_widget_name>.<widget_name>`.

PyPDFForm supports accessing widgets through their full names by simply setting the optional parameter `use_full_widget_name` to `True` when a `PdfWrapper` object is instantiated. Consider [this PDF](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/sample_template_with_full_key.pdf):

```python
from PyPDFForm import PdfWrapper

pdf = PdfWrapper("sample_template_with_full_key.pdf", use_full_widget_name=True)
```

The checkbox widget on the second page with texts `Gain de 2 classes` has a partial name of `0` and a full name of `Gain de 2 classes.0`. By constructing the object like above, you can access the same checkbox through both the partial name and the full name.

**NOTE:** Because each full widget name involves both the widget itself and its parent widget, the methods `update_widget_key` and `commit_widget_key_updates` are disabled and will raise a `NotImplementedError` when invoked through an object that uses full widget names.

## Write to a file

Lastly, `PdfWrapper` also implements itself similar to an open file object. So you can write the PDF it holds to another
Expand Down
Binary file added pdf_samples/sample_template_with_full_key.pdf
Binary file not shown.
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ def sample_template_with_image_field(pdf_samples):
return f.read()


@pytest.fixture
def sample_template_with_full_key(pdf_samples):
with open(
os.path.join(pdf_samples, "sample_template_with_full_key.pdf"), "rb+"
) as f:
return f.read()


@pytest.fixture
def dropdown_alignment(pdf_samples):
with open(
Expand Down
52 changes: 52 additions & 0 deletions tests/test_use_full_widget_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-

import pytest

from PyPDFForm import PdfWrapper


def test_init(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
assert "Gain de 2 classes.0" in obj.widgets
assert obj.widgets["Gain de 2 classes.0"] is obj.widgets["0"]


def test_sample_data(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
assert "Gain de 2 classes.0" in obj.sample_data
assert obj.sample_data["Gain de 2 classes.0"] == obj.sample_data["0"]


def test_fill(sample_template_with_full_key):
obj_1 = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
obj_2 = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)

assert obj_1.fill(
{
"Gain de 2 classes.0": True
}
).read() == obj_2.fill(
{
"0": True
}
).read()


def test_update_widget_key(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)

with pytest.raises(NotImplementedError):
obj.update_widget_key("0", "foo")


def test_commit_widget_key_updates(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)

with pytest.raises(NotImplementedError):
obj.commit_widget_key_updates()


def test_schema(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
assert "Gain de 2 classes.0" in obj.schema["properties"]
assert obj.schema["properties"]["Gain de 2 classes.0"] == obj.schema["properties"]["0"]

0 comments on commit c233015

Please sign in to comment.