release v2.0.12

jorisschellekens · Oct 17, 2021 · 17e1208 · 17e1208
1 parent 9580de9
commit 17e1208
Show file tree

Hide file tree

Showing 270 changed files with 365 additions and 321 deletions.
diff --git a/borb/io/read/reference/xref_transformer.py b/borb/io/read/reference/xref_transformer.py
@@ -104,6 +104,11 @@ def transform(
 
             # build encryption handler
             v: int = int(xref["Trailer"]["Encrypt"].get("V", Decimal(0)))
+            r: int = int(xref["Trailer"]["Encrypt"]["R"])
+            if r != 2 and r != 3:
+                assert (
+                    False
+                ), "R is not 2 or 3. A number specifying which revision of the standard security handler shall be used to interpret this dictionary."
             if v == 0:
                 assert False, (
                     "V is 0. An algorithm that is undocumented. "

diff --git a/borb/io/read/types.py b/borb/io/read/types.py
@@ -529,9 +529,8 @@ class String:
     described in this sub-clause. Balanced pairs of parentheses within a string require no special treatment.
     """
 
-    def __init__(self, text: str, encoding: Optional["Encoding"] = None):  # type: ignore [name-defined]
+    def __init__(self, text: str):  # type: ignore [name-defined]
         self._text = text
-        self._encoding = encoding
         add_base_methods(self)
 
     def __eq__(self, other):
@@ -640,10 +639,7 @@ def get_value_bytes(self):
         This function returns the bytes that represent the content (as it was present in the PDF)
         of this String
         """
-        if self._encoding is None:
-            return [b for b in self.get_content_bytes()]
-        # TODO: password protected ??
-        return None
+        return [b for b in self.get_content_bytes()]
 
 
 class HexadecimalString(String):

diff --git a/borb/io/write/ascii_art/ascii_logo.txt b/borb/io/write/ascii_art/ascii_logo.txt
@@ -1,2 +1,2 @@
-borb version 2.0.11
+borb version 2.0.12
 Joris Schellekens
diff --git a/borb/io/write/document/document_transformer.py b/borb/io/write/document/document_transformer.py
@@ -94,18 +94,16 @@ def transform(
         )
 
         # set OutputIntents
-        object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")] = List()
+        # fmt: off
         rgb_output_intent: Dictionary = Dictionary()
         rgb_output_intent[Name("Type")] = Name("OutputIntent")
         rgb_output_intent[Name("S")] = Name("GTS_PDFA1")
         rgb_output_intent[Name("OutputConditionIdentifier")] = String("sRGB")
         rgb_output_intent[Name("RegistryName")] = String("http://www.color.org")
-        rgb_output_intent[Name("Info")] = String(
-            "Creator:HP Manufacturer:IEC Model:sRGB"
-        )
-        object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")].append(
-            rgb_output_intent
-        )
+        rgb_output_intent[Name("Info")] = String("Creator:HP Manufacturer:IEC Model:sRGB")
+        # object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")] = List()
+        # object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")].append(rgb_output_intent)
+        # fmt: on
 
         # transform XREF
         self.get_root_transformer().transform(object_to_transform["XRef"], context)

diff --git a/borb/pdf/canvas/layout/image/shape.py b/borb/pdf/canvas/layout/image/shape.py
@@ -7,6 +7,7 @@
 """
 import typing
 from decimal import Decimal
+from math import sqrt
 from typing import Tuple
 
 from borb.pdf.canvas.color.color import Color, HexColor, X11Color
@@ -29,6 +30,7 @@ def __init__(
         line_width: Decimal = Decimal(0),
         horizontal_alignment: Alignment = Alignment.LEFT,
         vertical_alignment: Alignment = Alignment.TOP,
+        auto_close_shape: bool = False,
     ):
         super(Shape, self).__init__(
             horizontal_alignment=horizontal_alignment,
@@ -41,6 +43,17 @@ def __init__(
         assert line_width >= Decimal(0)
         self._line_width = line_width
 
+        # close shape if desired (and needed)
+        if (
+            auto_close_shape
+            and sqrt(
+                (points[0][0] - points[-1][0]) ** 2
+                + (points[0][1] - points[-1][1]) ** 2
+            )
+            > 0.00000001
+        ):
+            points.append(points[0])
+
     def get_width(self) -> Decimal:
         """
         This function returns the width of this Shape
@@ -129,7 +142,7 @@ def _do_layout_without_padding(
             self._line_width,
         )
         content += "%f %f m " % (self._points[0][0], self._points[0][1])
-        for p in self._points:
+        for p in self._points[1:]:
             content += " %f %f l " % (p[0], p[1])
 
         operator: str = "B"

diff --git a/borb/pdf/canvas/layout/layout_element.py b/borb/pdf/canvas/layout/layout_element.py
@@ -10,7 +10,7 @@
 from decimal import Decimal
 from enum import Enum
 
-from borb.io.read.types import Decimal as pDecimal
+from borb.io.read.types import Decimal as pDecimal, Dictionary
 from borb.io.read.types import Name, Stream
 from borb.pdf.canvas.color.color import Color, HexColor
 from borb.pdf.canvas.geometry.rectangle import Rectangle
@@ -159,22 +159,38 @@ def get_bounding_box(self) -> typing.Optional[Rectangle]:
         return self.bounding_box
 
     def _initialize_page_content_stream(self, page: "Page"):  # type: ignore[name-defined]
-        if "Contents" in page:
-            return
 
         # build content stream object
-        content_stream = Stream()
-        content_stream[Name("DecodedBytes")] = b""
-        content_stream[Name("Bytes")] = zlib.compress(content_stream["DecodedBytes"], 9)
-        content_stream[Name("Filter")] = Name("FlateDecode")
-        content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
+        if "Contents" not in page:
+            content_stream = Stream()
+            content_stream[Name("DecodedBytes")] = b""
+            content_stream[Name("Bytes")] = zlib.compress(
+                content_stream["DecodedBytes"], 9
+            )
+            content_stream[Name("Filter")] = Name("FlateDecode")
+            content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
+
+            # set content of page
+            page[Name("Contents")] = content_stream
 
-        # set content of page
-        page[Name("Contents")] = content_stream
+        # set Resources
+        if "Resources" not in page:
+            page[Name("Resources")] = Dictionary()
 
     def _append_to_content_stream(self, page: "Page", instructions: str):  # type: ignore[name-defined]
         self._initialize_page_content_stream(page)
         content_stream = page["Contents"]
+
+        # prepend whitespace if needed
+        if len(content_stream[Name("DecodedBytes")]) != 0:
+            decoded_bytes_last_char: str = str(
+                content_stream["DecodedBytes"][-1:], encoding="latin1"
+            )
+            if decoded_bytes_last_char not in [" ", "\t", "\n"] and instructions[
+                0
+            ] not in [" ", "\t", "\n"]:
+                instructions = " " + instructions
+
         content_stream[Name("DecodedBytes")] += instructions.encode("latin1")
         content_stream[Name("Bytes")] = zlib.compress(content_stream["DecodedBytes"], 9)
         content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))

diff --git a/borb/pdf/canvas/operator/path_painting/close_and_stroke_path.py b/borb/pdf/canvas/operator/path_painting/close_and_stroke_path.py
@@ -6,7 +6,6 @@
 sequence h S.
 """
 import typing
-from typing import List
 
 from borb.io.read.types import AnyPDFType
 from borb.pdf.canvas.operator.canvas_operator import CanvasOperator

diff --git a/borb/pdf/pdf.py b/borb/pdf/pdf.py
@@ -9,9 +9,8 @@
     PDF was standardized as ISO 32000 in 2008, and no longer requires any royalties for its implementation.
 """
 import io
-from typing import List, Union
-
 import typing
+from typing import List, Union
 
 from borb.io.read.any_object_transformer import (
     AnyObjectTransformer as ReadAnyObjectTransformer,

diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/any_markdown_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/any_markdown_transformer.py
@@ -11,6 +11,9 @@
 from borb.toolkit.export.markdown_to_pdf.markdown_transformer.heading.horizontal_rule_transformer import (
     HorizontalRuleTransformer,
 )
+from borb.toolkit.export.markdown_to_pdf.markdown_transformer.image.image_transformer import (
+    ImageTransformer,
+)
 from borb.toolkit.export.markdown_to_pdf.markdown_transformer.list.ordered_list_transformer import (
     OrderedListTransformer,
 )
@@ -47,7 +50,8 @@ def __init__(self):
             .add_child_transformer(UnorderedListTransformer())          \
             .add_child_transformer(OrderedListTransformer())            \
             .add_child_transformer(TableTransformer())                  \
-            .add_child_transformer(ParagraphTransformer())
+            .add_child_transformer(ParagraphTransformer()) \
+            .add_child_transformer(ImageTransformer())
         # fmt: on
 
     def _can_transform(self, context: MarkdownTransformerState) -> bool:

diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/__init__.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/__init__.py
@@ -0,0 +1,43 @@
+"""
+    This file is part of the borb (R) project.
+    Copyright (c) 2020-2040 borb Group NV
+    Authors: Joris Schellekens, et al.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License version 3
+    as published by the Free Software Foundation with the addition of the
+    following permission added to Section 15 as permitted in Section 7(a):
+    FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
+    BORB GROUP. BORB GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
+    OF THIRD PARTY RIGHTS
+
+    This program is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+    or FITNESS FOR A PARTICULAR PURPOSE.
+
+    See the GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program; if not, see http://www.gnu.org/licenses or write to
+    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+    Boston, MA, 02110-1301 USA.
+
+    The interactive user interfaces in modified source and object code versions
+    of this program must display Appropriate Legal Notices, as required under
+    Section 5 of the GNU Affero General Public License.
+    In accordance with Section 7(b) of the GNU Affero General Public License,
+    a covered work must retain the producer line in every PDF that is created
+    or manipulated using borb.
+
+    You can be released from the requirements of the license by purchasing
+    a commercial license. Buying such a license is mandatory as soon as you
+    develop commercial activities involving the borb software without
+    disclosing the source code of your own applications.
+
+    These activities include: offering paid services to customers as an ASP,
+    serving PDFs on the fly in a web application, shipping borb with a closed
+    source product.
+
+    For more information, please contact borb Software Corp. at this
+    address: [email protected]
+"""
diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/image_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/image_transformer.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+This implementation of BaseMarkdownTransformer handles images
+"""
+import re
+import typing
+from decimal import Decimal
+
+import requests
+from PIL import Image as PILImage  # type: ignore [import]
+
+from borb.pdf.canvas.layout.image.image import Image
+from borb.pdf.canvas.layout.page_layout.browser_layout import BrowserLayout
+from borb.pdf.page.page import Page
+from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import (
+    BaseMarkdownTransformer,
+    MarkdownTransformerState,
+)
+
+
+class ImageTransformer(BaseMarkdownTransformer):
+    """
+    This implementation of BaseMarkdownTransformer handles images
+    """
+
+    @staticmethod
+    def _get_image_default_margins():
+        pil_image = PILImage.new("RGB", (16, 16))
+        borb_image = Image(pil_image, width=Decimal(10), height=Decimal(10))
+        return (
+            borb_image.get_margin_top(),
+            borb_image.get_margin_right(),
+            borb_image.get_margin_bottom(),
+            borb_image.get_margin_left(),
+        )
+
+    def _can_transform(self, context: MarkdownTransformerState) -> bool:
+        if context.get_markdown_string()[context.tell()] != "!":
+            return False
+        markdown_str: str = context.get_markdown_string()[
+            context.tell() : context.get_markdown_string().find(
+                "\n", context.tell() + 1
+            )
+        ]
+        return re.match("!\[[^]]+\]\([^)]+\)", markdown_str) is not None
+
+    def _transform(self, context: MarkdownTransformerState) -> None:
+
+        # get markdown string of current char -> next line
+        markdown_str: str = context.get_markdown_string()[
+            context.tell() : context.get_markdown_string().find(
+                "\n", context.tell() + 1
+            )
+        ]
+        assert len(markdown_str) > 0
+
+        # match against regex
+        match: re.Match = re.match("!\[[^]]+\]\((?P<url>[^)]+)\)", markdown_str)
+        assert match is not None
+
+        # extract (named group) url
+        url: str = match["url"]
+        assert len(url) > 0
+
+        # open raw image
+        image = PILImage.open(
+            requests.get(
+                url,
+                stream=True,
+            ).raw
+        )
+
+        # get width and height
+        w: int = image.width
+        h: int = image.height
+
+        # determine max available width/height
+        margins: typing.Tuple[
+            Decimal, Decimal, Decimal, Decimal
+        ] = ImageTransformer._get_image_default_margins()
+        W: int = 128
+        H: int = 128
+
+        # Page
+        parent_element = context.get_parent_layout_element()
+        if isinstance(parent_element, Page):
+            W = int(parent_element.get_page_info().get_width() * Decimal(0.8))
+            H = int(parent_element.get_page_info().get_height() * Decimal(0.8))
+
+        # BrowserLayout
+        if isinstance(parent_element, BrowserLayout):
+            W = (
+                int(
+                    parent_element.get_page().get_page_info().get_width()
+                    - parent_element._horizontal_margin * Decimal(2)
+                )
+                - 1
+            )
+            H = (
+                int(
+                    parent_element.get_page().get_page_info().get_height()
+                    - parent_element._vertical_margin * Decimal(2)
+                )
+                - 1
+            )
+
+        # TODO: Table
+
+        # margin
+        W = W - int(margins[1]) - int(margins[3]) - 1
+        H = H - int(margins[0]) - int(margins[2]) - 1
+
+        # rescale
+        r: float = min(W / w, H / h)
+        w = int(w * r)
+        h = int(h * r)
+
+        # create and add Image
+        borb_image: Image = Image(image, width=Decimal(w), height=Decimal(h))
+        parent_element.add(borb_image)
+
+        # add remote go to annotation
+        try:
+            parent_element.get_page().append_remote_go_to_annotation(
+                borb_image.get_bounding_box(), url
+            )
+        except:
+            pass
+
+        # seek
+        context.seek(context.get_markdown_string().find("\n", context.tell()) + 1)
diff --git a/borb/toolkit/text/regular_expression_text_extraction.py b/borb/toolkit/text/regular_expression_text_extraction.py
@@ -39,7 +39,7 @@ def __init__(
         self._page_nr: int = page_nr
         self._glyph_bounding_boxes: typing.List["Rectangle"] = glyph_bounding_boxes
         self._re_match: re.Match = re_match
-        # these fields are kept public to align with the existing python re.match object
+        # these fields are kept public to align with the existing python re.Match object
         self.pos = self._re_match.pos
         self.endpos = self._re_match.endpos
         self.lastindex = self._re_match.lastindex