Skip to content

Commit

Permalink
Merge pull request #87 from pmaupin/future_compatibility
Browse files Browse the repository at this point in the history
Simplify pdfwriter in preparation for major changes
  • Loading branch information
pmaupin authored Apr 8, 2017
2 parents 09327d6 + b08ef24 commit 6753976
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 12 deletions.
3 changes: 1 addition & 2 deletions pdfrw/findobjs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
'''

from .objects import PdfDict, PdfArray, PdfName
from .pdfwriter import user_fmt


def find_objects(source, valid_types=(PdfName.XObject, None),
Expand Down Expand Up @@ -81,7 +80,7 @@ def wrap_object(obj, width, margin):
iw, ih = float(obj.Width), float(obj.Height)
ch = 1.0 * cw / iw * ih
height = ch + margin[1] + margin[3]
p = tuple(user_fmt(x) for x in (cw, ch, xoffset, yoffset))
p = tuple(('%.9f' % x).rstrip('0').rstrip('.') for x in (cw, ch, xoffset, yoffset))
contents.stream = fmt % p
resources = PdfDict(XObject=PdfDict(MyImage=obj))
mbox = PdfArray((0, 0, width, height))
Expand Down
1 change: 1 addition & 0 deletions pdfrw/objects/pdfname.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class BasePdfName(str):
'''

indirect = False
encoded = None

whitespace = '\x00 \t\f\r\n'
delimiters = '()<>{}[]/%'
Expand Down
8 changes: 4 additions & 4 deletions pdfrw/pdfwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,11 @@ def format_obj(obj):
elif isinstance(obj, PdfDict):
if compress and obj.stream:
do_compress([obj])
pairs = sorted((x, y, getattr(x, 'encoded', x))
pairs = sorted((getattr(x, 'encoded', None) or x, y)
for (x, y) in obj.iteritems())
myarray = []
for key, value, encoding in pairs:
myarray.append(encoding)
for key, value in pairs:
myarray.append(key)
myarray.append(add(value))
result = format_array(myarray, '<<%s>>')
stream = obj.stream
Expand All @@ -155,7 +155,7 @@ def format_obj(obj):
# We assume that an object with an indirect
# attribute knows how to represent itself to us.
if hasattr(obj, 'indirect'):
return str(getattr(obj, 'encoded', obj))
return str(getattr(obj, 'encoded', None) or obj)
return user_fmt(obj)

def format_deferred():
Expand Down
2 changes: 1 addition & 1 deletion pdfrw/toreportlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _makearray(rldoc, pdfobj):
def _makestr(rldoc, pdfobj):
assert isinstance(pdfobj, (float, int, str)), repr(pdfobj)
# TODO: Add fix for float like in pdfwriter
return str(getattr(pdfobj, 'encoded', pdfobj))
return str(getattr(pdfobj, 'encoded', None) or pdfobj)


def makerl_recurse(rldoc, pdfobj):
Expand Down
10 changes: 5 additions & 5 deletions tests/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ examples/subset_b1c400de699af29ea3f1983bb26870ab_1-3_5 880a9578197130273ccb
examples/unspread_d711b74110eefb4e9e6bf1a5bea16bfe 780a9abe26a9de0b5b95ee22c4835e4b

examples/cat_b1c400de699af29ea3f1983bb26870ab_06c86654f9a77e82f9adaa0086fc391c 62bb9b746ff5932d3f1b88942d36a81d
examples/rotate_707e3e2d17cbe9ec2273414b3b63f333_270_1-4_7-8_10-50_52-56 841c980dfadf2cc47ad86e4649ca69b6
examples/rotate_707e3e2d17cbe9ec2273414b3b63f333_270_1-4_7-8_10-50_52-56 7633ba56641115050ba098ecbef8d331
examples/watermark_b1c400de699af29ea3f1983bb26870ab_06c86654f9a77e82f9adaa0086fc391c fe2330d42b3bfc06212415f295752f0e
examples/watermark_b1c400de699af29ea3f1983bb26870ab_06c86654f9a77e82f9adaa0086fc391c_-u e43e3ac0afe1cc242549424755dbf612

Expand Down Expand Up @@ -88,7 +88,7 @@ repaginate/5f265db2736850782aeaba2571a3c749.pdf 2e3046813ce6e40a39bd759a3c8a3c8c
repaginate/6a42c8c79b807bf164d31071749e07b0.pdf bf00d5e44869ae59eb859860d7d5373f
repaginate/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 612cdd84eeac797a1c42fc91756b6d9e
repaginate/7037a992b80b60f0294016037baa9292.pdf dd41b0104f185206b51e7ffe5b07d261
repaginate/707e3e2d17cbe9ec2273414b3b63f333.pdf 6c65526ab372d72cb185933e3d2584ef
repaginate/707e3e2d17cbe9ec2273414b3b63f333.pdf df4d756e2230c333f0c58ad354b5b51c
repaginate/71a751ce2d93a6a5d6ff21735b701fb7.pdf a825f06c934319b93474902fcf300cd2
repaginate/72eb207b8f882618899aa7a65d3cecda.pdf 0b64f19a8a39fadfa2a3eec3f1a01233
repaginate/97ba0a239cefa0dc727c2f1be050ec6c.pdf a94fe7183ce8979174b2ac16dcd9b1ea
Expand Down Expand Up @@ -127,7 +127,7 @@ simple/5f265db2736850782aeaba2571a3c749.pdf d4d2e93ab22e866c86e32da84421f6f9
simple/6a42c8c79b807bf164d31071749e07b0.pdf 221fec351c925a43f5f409fe03d90013
simple/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf fe8dd16dd7fef40338140e0610d0cbbf
simple/7037a992b80b60f0294016037baa9292.pdf 6a2ef24e5f74dd74969ff8cefdfc6a05
simple/707e3e2d17cbe9ec2273414b3b63f333.pdf 4bdf1e57a96ce42717110b4e55098c1a
simple/707e3e2d17cbe9ec2273414b3b63f333.pdf fb6a8eb3cdc2fbef125babe8815f3b70
simple/71a751ce2d93a6a5d6ff21735b701fb7.pdf a825f06c934319b93474902fcf300cd2
simple/72eb207b8f882618899aa7a65d3cecda.pdf 4ce7ff29531cc417c26389af28dc1c5e
simple/97ba0a239cefa0dc727c2f1be050ec6c.pdf c24873bab85b8ecc7c5433d8d802bceb
Expand Down Expand Up @@ -167,7 +167,7 @@ decompress/5f265db2736850782aeaba2571a3c749.pdf 836abcf6e6e1d39ad96481eb20e9b149
decompress/6a42c8c79b807bf164d31071749e07b0.pdf 221fec351c925a43f5f409fe03d90013
decompress/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 226773cac79e1a5fed1379a0501a5df0
decompress/7037a992b80b60f0294016037baa9292.pdf c9a3602b26d82ae145d9f5822125a158
decompress/707e3e2d17cbe9ec2273414b3b63f333.pdf f6d960e75480aa4f729059388dcedd71
decompress/707e3e2d17cbe9ec2273414b3b63f333.pdf 3250a56e14a9855eccd67bb347808d24
decompress/71a751ce2d93a6a5d6ff21735b701fb7.pdf a825f06c934319b93474902fcf300cd2
decompress/72eb207b8f882618899aa7a65d3cecda.pdf a4366874fb6db1d9a0c998361ea32b8d
decompress/97ba0a239cefa0dc727c2f1be050ec6c.pdf c24873bab85b8ecc7c5433d8d802bceb
Expand Down Expand Up @@ -208,7 +208,7 @@ compress/5f265db2736850782aeaba2571a3c749.pdf bb4898beac50171de7502f13925af80c
compress/6a42c8c79b807bf164d31071749e07b0.pdf 221fec351c925a43f5f409fe03d90013
compress/6f3a4de5c68ba3b5093e9b54b7c4e9f4.pdf 1c3fbae41e7cad7deca13fab93514bc7
compress/7037a992b80b60f0294016037baa9292.pdf 9182a9765544e4a91404db65a6f951d7
compress/707e3e2d17cbe9ec2273414b3b63f333.pdf bde552c97872c5a4eeafab3b8b38f703
compress/707e3e2d17cbe9ec2273414b3b63f333.pdf 0e75dda73bf18d9968499277ab1a367e
compress/71a751ce2d93a6a5d6ff21735b701fb7.pdf faa7eb31789a3789f65de30a4e58e594
compress/72eb207b8f882618899aa7a65d3cecda.pdf 0155549fc04357220cc6be541dda7bc1
compress/97ba0a239cefa0dc727c2f1be050ec6c.pdf 067bfee3b2bd9c250e7c4157ff543a81
Expand Down
84 changes: 84 additions & 0 deletions tests/update_expected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#! /usr/bin/env python2
"""
Put old (good) results in ramdisk/reference,
then generate new (unknown) test results in ramdisk/tmp_results,
THEN SWITCH BACK TO KNOWN GOOD SYSTEM, and finally:
run this to update any checksums in expected.txt where both versions
parse to same PDFs.
"""

import os
import hashlib
from pdfrw import PdfReader, PdfWriter, PdfArray, PdfDict, PdfObject


def make_canonical(trailer):
''' Canonicalizes a PDF. Assumes everything
is a Pdf object already.
'''
visited = set()
workitems = list(trailer.values())
while workitems:
obj = workitems.pop()
objid = id(obj)
if objid in visited:
continue
visited.add(objid)
obj.indirect = True
if isinstance(obj, (PdfArray, PdfDict)):
if isinstance(obj, PdfArray):
workitems += obj
else:
workitems += obj.values()
return trailer

with open('expected.txt', 'rb') as f:
expected = f.read()

def get_digest(fname):
with open(fname, 'rb') as f:
data = f.read()
if data:
return hashlib.md5(data).hexdigest()

tmp = '_temp.pdf'
count = 0
goodcount = 0

changes = []
for (srcpath, _, filenames) in os.walk('ramdisk/reference'):
for name in filenames:
if not name.endswith('.pdf'):
continue
src = os.path.join(srcpath, name)
dst = src.replace('/reference/', '/tmp_results/')
if not os.path.exists(dst):
continue
src_digest = get_digest(src)
if not src_digest or src_digest not in expected:
continue
print src
count += 1
trailer = make_canonical(PdfReader(src))
out = PdfWriter(tmp)
out.write(trailer=trailer)
match_digest = get_digest(tmp)
if not match_digest:
continue
trailer = make_canonical(PdfReader(dst))
out = PdfWriter(tmp)
out.write(trailer=trailer)
if get_digest(tmp) != match_digest:
continue
goodcount += 1
print "OK"
changes.append((src_digest, get_digest(dst)))

print count, goodcount

for stuff in changes:
expected = expected.replace(*stuff)

with open('expected.txt', 'wb') as f:
f.write(expected)

0 comments on commit 6753976

Please sign in to comment.