Skip to content

Commit

Permalink
release v2.0.12
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisschellekens committed Oct 17, 2021
1 parent 9580de9 commit 17e1208
Show file tree
Hide file tree
Showing 270 changed files with 365 additions and 321 deletions.
5 changes: 5 additions & 0 deletions borb/io/read/reference/xref_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ def transform(

# build encryption handler
v: int = int(xref["Trailer"]["Encrypt"].get("V", Decimal(0)))
r: int = int(xref["Trailer"]["Encrypt"]["R"])
if r != 2 and r != 3:
assert (
False
), "R is not 2 or 3. A number specifying which revision of the standard security handler shall be used to interpret this dictionary."
if v == 0:
assert False, (
"V is 0. An algorithm that is undocumented. "
Expand Down
8 changes: 2 additions & 6 deletions borb/io/read/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,9 +529,8 @@ class String:
described in this sub-clause. Balanced pairs of parentheses within a string require no special treatment.
"""

def __init__(self, text: str, encoding: Optional["Encoding"] = None): # type: ignore [name-defined]
def __init__(self, text: str): # type: ignore [name-defined]
self._text = text
self._encoding = encoding
add_base_methods(self)

def __eq__(self, other):
Expand Down Expand Up @@ -640,10 +639,7 @@ def get_value_bytes(self):
This function returns the bytes that represent the content (as it was present in the PDF)
of this String
"""
if self._encoding is None:
return [b for b in self.get_content_bytes()]
# TODO: password protected ??
return None
return [b for b in self.get_content_bytes()]


class HexadecimalString(String):
Expand Down
2 changes: 1 addition & 1 deletion borb/io/write/ascii_art/ascii_logo.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
borb version 2.0.11
borb version 2.0.12
Joris Schellekens
12 changes: 5 additions & 7 deletions borb/io/write/document/document_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,16 @@ def transform(
)

# set OutputIntents
object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")] = List()
# fmt: off
rgb_output_intent: Dictionary = Dictionary()
rgb_output_intent[Name("Type")] = Name("OutputIntent")
rgb_output_intent[Name("S")] = Name("GTS_PDFA1")
rgb_output_intent[Name("OutputConditionIdentifier")] = String("sRGB")
rgb_output_intent[Name("RegistryName")] = String("http://www.color.org")
rgb_output_intent[Name("Info")] = String(
"Creator:HP Manufacturer:IEC Model:sRGB"
)
object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")].append(
rgb_output_intent
)
rgb_output_intent[Name("Info")] = String("Creator:HP Manufacturer:IEC Model:sRGB")
# object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")] = List()
# object_to_transform["XRef"]["Trailer"]["Root"][Name("OutputIntents")].append(rgb_output_intent)
# fmt: on

# transform XREF
self.get_root_transformer().transform(object_to_transform["XRef"], context)
Expand Down
15 changes: 14 additions & 1 deletion borb/pdf/canvas/layout/image/shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""
import typing
from decimal import Decimal
from math import sqrt
from typing import Tuple

from borb.pdf.canvas.color.color import Color, HexColor, X11Color
Expand All @@ -29,6 +30,7 @@ def __init__(
line_width: Decimal = Decimal(0),
horizontal_alignment: Alignment = Alignment.LEFT,
vertical_alignment: Alignment = Alignment.TOP,
auto_close_shape: bool = False,
):
super(Shape, self).__init__(
horizontal_alignment=horizontal_alignment,
Expand All @@ -41,6 +43,17 @@ def __init__(
assert line_width >= Decimal(0)
self._line_width = line_width

# close shape if desired (and needed)
if (
auto_close_shape
and sqrt(
(points[0][0] - points[-1][0]) ** 2
+ (points[0][1] - points[-1][1]) ** 2
)
> 0.00000001
):
points.append(points[0])

def get_width(self) -> Decimal:
"""
This function returns the width of this Shape
Expand Down Expand Up @@ -129,7 +142,7 @@ def _do_layout_without_padding(
self._line_width,
)
content += "%f %f m " % (self._points[0][0], self._points[0][1])
for p in self._points:
for p in self._points[1:]:
content += " %f %f l " % (p[0], p[1])

operator: str = "B"
Expand Down
36 changes: 26 additions & 10 deletions borb/pdf/canvas/layout/layout_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from decimal import Decimal
from enum import Enum

from borb.io.read.types import Decimal as pDecimal
from borb.io.read.types import Decimal as pDecimal, Dictionary
from borb.io.read.types import Name, Stream
from borb.pdf.canvas.color.color import Color, HexColor
from borb.pdf.canvas.geometry.rectangle import Rectangle
Expand Down Expand Up @@ -159,22 +159,38 @@ def get_bounding_box(self) -> typing.Optional[Rectangle]:
return self.bounding_box

def _initialize_page_content_stream(self, page: "Page"): # type: ignore[name-defined]
if "Contents" in page:
return

# build content stream object
content_stream = Stream()
content_stream[Name("DecodedBytes")] = b""
content_stream[Name("Bytes")] = zlib.compress(content_stream["DecodedBytes"], 9)
content_stream[Name("Filter")] = Name("FlateDecode")
content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
if "Contents" not in page:
content_stream = Stream()
content_stream[Name("DecodedBytes")] = b""
content_stream[Name("Bytes")] = zlib.compress(
content_stream["DecodedBytes"], 9
)
content_stream[Name("Filter")] = Name("FlateDecode")
content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))

# set content of page
page[Name("Contents")] = content_stream

# set content of page
page[Name("Contents")] = content_stream
# set Resources
if "Resources" not in page:
page[Name("Resources")] = Dictionary()

def _append_to_content_stream(self, page: "Page", instructions: str): # type: ignore[name-defined]
self._initialize_page_content_stream(page)
content_stream = page["Contents"]

# prepend whitespace if needed
if len(content_stream[Name("DecodedBytes")]) != 0:
decoded_bytes_last_char: str = str(
content_stream["DecodedBytes"][-1:], encoding="latin1"
)
if decoded_bytes_last_char not in [" ", "\t", "\n"] and instructions[
0
] not in [" ", "\t", "\n"]:
instructions = " " + instructions

content_stream[Name("DecodedBytes")] += instructions.encode("latin1")
content_stream[Name("Bytes")] = zlib.compress(content_stream["DecodedBytes"], 9)
content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
sequence h S.
"""
import typing
from typing import List

from borb.io.read.types import AnyPDFType
from borb.pdf.canvas.operator.canvas_operator import CanvasOperator
Expand Down
3 changes: 1 addition & 2 deletions borb/pdf/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
PDF was standardized as ISO 32000 in 2008, and no longer requires any royalties for its implementation.
"""
import io
from typing import List, Union

import typing
from typing import List, Union

from borb.io.read.any_object_transformer import (
AnyObjectTransformer as ReadAnyObjectTransformer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from borb.toolkit.export.markdown_to_pdf.markdown_transformer.heading.horizontal_rule_transformer import (
HorizontalRuleTransformer,
)
from borb.toolkit.export.markdown_to_pdf.markdown_transformer.image.image_transformer import (
ImageTransformer,
)
from borb.toolkit.export.markdown_to_pdf.markdown_transformer.list.ordered_list_transformer import (
OrderedListTransformer,
)
Expand Down Expand Up @@ -47,7 +50,8 @@ def __init__(self):
.add_child_transformer(UnorderedListTransformer()) \
.add_child_transformer(OrderedListTransformer()) \
.add_child_transformer(TableTransformer()) \
.add_child_transformer(ParagraphTransformer())
.add_child_transformer(ParagraphTransformer()) \
.add_child_transformer(ImageTransformer())
# fmt: on

def _can_transform(self, context: MarkdownTransformerState) -> bool:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
This file is part of the borb (R) project.
Copyright (c) 2020-2040 borb Group NV
Authors: Joris Schellekens, et al.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License version 3
as published by the Free Software Foundation with the addition of the
following permission added to Section 15 as permitted in Section 7(a):
FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
BORB GROUP. BORB GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
OF THIRD PARTY RIGHTS
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program; if not, see http://www.gnu.org/licenses or write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA, 02110-1301 USA.
The interactive user interfaces in modified source and object code versions
of this program must display Appropriate Legal Notices, as required under
Section 5 of the GNU Affero General Public License.
In accordance with Section 7(b) of the GNU Affero General Public License,
a covered work must retain the producer line in every PDF that is created
or manipulated using borb.
You can be released from the requirements of the license by purchasing
a commercial license. Buying such a license is mandatory as soon as you
develop commercial activities involving the borb software without
disclosing the source code of your own applications.
These activities include: offering paid services to customers as an ASP,
serving PDFs on the fly in a web application, shipping borb with a closed
source product.
For more information, please contact borb Software Corp. at this
address: [email protected]
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
This implementation of BaseMarkdownTransformer handles images
"""
import re
import typing
from decimal import Decimal

import requests
from PIL import Image as PILImage # type: ignore [import]

from borb.pdf.canvas.layout.image.image import Image
from borb.pdf.canvas.layout.page_layout.browser_layout import BrowserLayout
from borb.pdf.page.page import Page
from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import (
BaseMarkdownTransformer,
MarkdownTransformerState,
)


class ImageTransformer(BaseMarkdownTransformer):
"""
This implementation of BaseMarkdownTransformer handles images
"""

@staticmethod
def _get_image_default_margins():
pil_image = PILImage.new("RGB", (16, 16))
borb_image = Image(pil_image, width=Decimal(10), height=Decimal(10))
return (
borb_image.get_margin_top(),
borb_image.get_margin_right(),
borb_image.get_margin_bottom(),
borb_image.get_margin_left(),
)

def _can_transform(self, context: MarkdownTransformerState) -> bool:
if context.get_markdown_string()[context.tell()] != "!":
return False
markdown_str: str = context.get_markdown_string()[
context.tell() : context.get_markdown_string().find(
"\n", context.tell() + 1
)
]
return re.match("!\[[^]]+\]\([^)]+\)", markdown_str) is not None

def _transform(self, context: MarkdownTransformerState) -> None:

# get markdown string of current char -> next line
markdown_str: str = context.get_markdown_string()[
context.tell() : context.get_markdown_string().find(
"\n", context.tell() + 1
)
]
assert len(markdown_str) > 0

# match against regex
match: re.Match = re.match("!\[[^]]+\]\((?P<url>[^)]+)\)", markdown_str)
assert match is not None

# extract (named group) url
url: str = match["url"]
assert len(url) > 0

# open raw image
image = PILImage.open(
requests.get(
url,
stream=True,
).raw
)

# get width and height
w: int = image.width
h: int = image.height

# determine max available width/height
margins: typing.Tuple[
Decimal, Decimal, Decimal, Decimal
] = ImageTransformer._get_image_default_margins()
W: int = 128
H: int = 128

# Page
parent_element = context.get_parent_layout_element()
if isinstance(parent_element, Page):
W = int(parent_element.get_page_info().get_width() * Decimal(0.8))
H = int(parent_element.get_page_info().get_height() * Decimal(0.8))

# BrowserLayout
if isinstance(parent_element, BrowserLayout):
W = (
int(
parent_element.get_page().get_page_info().get_width()
- parent_element._horizontal_margin * Decimal(2)
)
- 1
)
H = (
int(
parent_element.get_page().get_page_info().get_height()
- parent_element._vertical_margin * Decimal(2)
)
- 1
)

# TODO: Table

# margin
W = W - int(margins[1]) - int(margins[3]) - 1
H = H - int(margins[0]) - int(margins[2]) - 1

# rescale
r: float = min(W / w, H / h)
w = int(w * r)
h = int(h * r)

# create and add Image
borb_image: Image = Image(image, width=Decimal(w), height=Decimal(h))
parent_element.add(borb_image)

# add remote go to annotation
try:
parent_element.get_page().append_remote_go_to_annotation(
borb_image.get_bounding_box(), url
)
except:
pass

# seek
context.seek(context.get_markdown_string().find("\n", context.tell()) + 1)
2 changes: 1 addition & 1 deletion borb/toolkit/text/regular_expression_text_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(
self._page_nr: int = page_nr
self._glyph_bounding_boxes: typing.List["Rectangle"] = glyph_bounding_boxes
self._re_match: re.Match = re_match
# these fields are kept public to align with the existing python re.match object
# these fields are kept public to align with the existing python re.Match object
self.pos = self._re_match.pos
self.endpos = self._re_match.endpos
self.lastindex = self._re_match.lastindex
Expand Down
Loading

0 comments on commit 17e1208

Please sign in to comment.