From d5ad4cc9b30ba8e6faf7a8633de54283f220a826 Mon Sep 17 00:00:00 2001 From: arch-t15 Date: Thu, 24 Mar 2022 11:08:19 +0200 Subject: [PATCH 1/4] use nullhandler --- pdfrw/errors.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pdfrw/errors.py b/pdfrw/errors.py index ef6ab7d..d88e1f3 100644 --- a/pdfrw/errors.py +++ b/pdfrw/errors.py @@ -8,15 +8,8 @@ import logging - -fmt = logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)d %(message)s') - -handler = logging.StreamHandler() -handler.setFormatter(fmt) - log = logging.getLogger('pdfrw') -log.setLevel(logging.WARNING) -log.addHandler(handler) +log.addHandler(logging.NullHandler()) class PdfError(Exception): From 6d270b72d26939233e12edfab9dd968ae5b3479d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anti=20R=C3=A4is?= Date: Wed, 16 Aug 2023 13:53:25 +0300 Subject: [PATCH 2/4] 2to3 convert --- examples/rl2/decodegraphics.py | 20 ++++++++++---------- examples/subset_booklets.py | 8 ++++---- pdfrw/crypt.py | 2 +- pdfrw/findobjs.py | 2 +- pdfrw/objects/pdfdict.py | 10 +++++----- pdfrw/objects/pdfstring.py | 12 ++++++------ pdfrw/pagemerge.py | 4 ++-- pdfrw/pdfreader.py | 26 +++++++++++++------------- pdfrw/pdfwriter.py | 8 ++++---- pdfrw/py23_diffs.py | 2 +- pdfrw/toreportlab.py | 4 ++-- pdfrw/uncompress.py | 12 ++++++------ tests/checkdiffs.py | 8 ++++---- tests/myprofile.py | 2 +- tests/test_examples.py | 2 +- tests/test_flate_png.py | 4 ++-- tests/test_pdfstring.py | 20 ++++++++++---------- tests/test_roundtrip.py | 2 +- tests/update_expected.py | 8 ++++---- 19 files changed, 78 insertions(+), 78 deletions(-) diff --git a/examples/rl2/decodegraphics.py b/examples/rl2/decodegraphics.py index d26daf7..d51b374 100644 --- a/examples/rl2/decodegraphics.py +++ b/examples/rl2/decodegraphics.py @@ -359,7 +359,7 @@ def fixlist(params): dispatch = {} expected_args = 'self token params'.split() - for key, func in globals().items(): + for key, func in list(globals().items()): if key.startswith('parse_'): args, varargs, keywords, defaults = getargspec(func) assert (args == expected_args and varargs is None and @@ -390,7 +390,7 @@ def parsepage(cls, page, canvas=None): self.gpath = None self.tpath = None self.fontdict = dict((x, FontInfo(y)) for - (x, y) in page.Resources.Font.items()) + (x, y) in list(page.Resources.Font.items())) for token in self.tokens: info = dispatch(token) @@ -404,14 +404,14 @@ def parsepage(cls, page, canvas=None): delta = len(params) - len(paraminfo) if delta: if delta < 0: - print ('Operator %s expected %s parameters, got %s' % - (token, len(paraminfo), params)) + print(('Operator %s expected %s parameters, got %s' % + (token, len(paraminfo), params))) params[:] = [] continue else: - print ("Unparsed parameters/commands: %s" % params[:delta]) + print(("Unparsed parameters/commands: %s" % params[:delta])) del params[:delta] - paraminfo = zip(paraminfo, params) + paraminfo = list(zip(paraminfo, params)) try: params[:] = [x(y) for (x, y) in paraminfo] except: @@ -431,13 +431,13 @@ def getvalue(oldval): name = oldval[0].__name__ def myfunc(self, token, params): - print ('%s called %s(%s)' % (token, name, - ', '.join(str(x) for x in params))) + print(('%s called %s(%s)' % (token, name, + ', '.join(str(x) for x in params)))) if name in undisturbed: myfunc = oldval[0] return myfunc, oldval[1] return dict((x, getvalue(y)) - for (x, y) in _ParseClass.dispatch.items()) + for (x, y) in list(_ParseClass.dispatch.items())) class _DebugParse(_ParseClass): dispatch = debugdispatch() @@ -453,5 +453,5 @@ class _DebugParse(_ParseClass): fname, = sys.argv[1:] pdf = PdfReader(fname, decompress=True) for i, page in enumerate(pdf.pages): - print ('\nPage %s ------------------------------------' % i) + print(('\nPage %s ------------------------------------' % i)) parse(page) diff --git a/examples/subset_booklets.py b/examples/subset_booklets.py index db0b9af..9717f8e 100755 --- a/examples/subset_booklets.py +++ b/examples/subset_booklets.py @@ -33,15 +33,15 @@ def fixpage(*pages): INPFN, = sys.argv[1:] OUTFN = 'booklet.' + os.path.basename(INPFN) ALL_IPAGES = PdfReader(INPFN).pages -print 'The pdf file '+str(INPFN)+' has '+str(len(ALL_IPAGES))+' pages.' +print('The pdf file '+str(INPFN)+' has '+str(len(ALL_IPAGES))+' pages.') #Make sure we have an even number if len(ALL_IPAGES) & 1: ALL_IPAGES.append(None) - print 'Inserting one more blank page to make pages number even.' + print('Inserting one more blank page to make pages number even.') NUM_OF_ITER, ITERS_LEFT = divmod(len(ALL_IPAGES), BOOKLET_SIZE) -print 'Making '+str(NUM_OF_ITER)+' subbooklets of '+str(BOOKLET_SIZE)+' pages each.' +print('Making '+str(NUM_OF_ITER)+' subbooklets of '+str(BOOKLET_SIZE)+' pages each.') opages = [] for iteration in range(0, NUM_OF_ITER): ipages = ALL_IPAGES[iteration*BOOKLET_SIZE:(iteration+1)*BOOKLET_SIZE] @@ -58,4 +58,4 @@ def fixpage(*pages): opages.append(fixpage(ipages.pop(), ipages.pop(0))) PdfWriter(OUTFN).addpages(opages).write() -print 'It took '+ str(round(time.time()-START, 2))+' seconds to make the pdf subbooklets changes.' +print('It took '+ str(round(time.time()-START, 2))+' seconds to make the pdf subbooklets changes.') diff --git a/pdfrw/crypt.py b/pdfrw/crypt.py index dc00676..e62a006 100644 --- a/pdfrw/crypt.py +++ b/pdfrw/crypt.py @@ -2,7 +2,7 @@ # Copyright (C) 2017 Jon Lund Steffensen # MIT license -- See LICENSE.txt for details -from __future__ import division + import hashlib import struct diff --git a/pdfrw/findobjs.py b/pdfrw/findobjs.py index 67d33a0..f89fbe2 100644 --- a/pdfrw/findobjs.py +++ b/pdfrw/findobjs.py @@ -49,7 +49,7 @@ def find_objects(source, valid_types=(PdfName.XObject, None), if isinstance(obj, PdfDict): if obj.Type in valid_types and obj.Subtype in valid_subtypes: yield obj - obj = [y for (x, y) in sorted(obj.iteritems()) + obj = [y for (x, y) in sorted(obj.items()) if x not in no_follow] else: # TODO: This forces resolution of any indirect objects in diff --git a/pdfrw/objects/pdfdict.py b/pdfrw/objects/pdfdict.py index 888fc83..67df42a 100644 --- a/pdfrw/objects/pdfdict.py +++ b/pdfrw/objects/pdfdict.py @@ -180,20 +180,20 @@ def iteritems(self, dictiter=iteritems, yield key, value def items(self): - return list(self.iteritems()) + return list(self.items()) def itervalues(self): - for key, value in self.iteritems(): + for key, value in self.items(): yield value def values(self): - return list((value for key, value in self.iteritems())) + return list((value for key, value in self.items())) def keys(self): - return list((key for key, value in self.iteritems())) + return list((key for key, value in self.items())) def __iter__(self): - for key, value in self.iteritems(): + for key, value in self.items(): yield key def iterkeys(self): diff --git a/pdfrw/objects/pdfstring.py b/pdfrw/objects/pdfstring.py index 906f30e..4415237 100644 --- a/pdfrw/objects/pdfstring.py +++ b/pdfrw/objects/pdfstring.py @@ -291,13 +291,13 @@ def find_pdfdocencoding(encoding): decoding_map = dict((x, x) for x in decoding_map) # Add in the special Unicode characters - decoding_map.update(zip(range(0x18, 0x20), ( - 0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC))) - decoding_map.update(zip(range(0x80, 0x9F), ( + decoding_map.update(list(zip(list(range(0x18, 0x20)), ( + 0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC)))) + decoding_map.update(list(zip(list(range(0x80, 0x9F)), ( 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203A, 0x2212, 0x2030, 0x201E, 0x201C, 0x201D, 0x2018, 0x2019, 0x201A, 0x2122, 0xFB01, 0xFB02, 0x0141, 0x0152, 0x0160, - 0x0178, 0x017D, 0x0131, 0x0142, 0x0153, 0x0161, 0x017E))) + 0x0178, 0x017D, 0x0131, 0x0142, 0x0153, 0x0161, 0x017E)))) decoding_map[0xA0] = 0x20AC # Make the encoding map from the decoding map @@ -350,7 +350,7 @@ def init_unescapes(cls): cls.unescape_func = unescape_func unescape_dict = dict(((chr(x), chr(x)) for x in range(0x100))) - unescape_dict.update(zip('nrtbf', '\n\r\t\b\f')) + unescape_dict.update(list(zip('nrtbf', '\n\r\t\b\f'))) unescape_dict['\r'] = '' unescape_dict['\n'] = '' unescape_dict['\r\n'] = '' @@ -543,7 +543,7 @@ def from_unicode(cls, source, text_encoding='auto', return cls.from_bytes(raw, encoding) @classmethod - def encode(cls, source, uni_type = type(u''), isinstance=isinstance): + def encode(cls, source, uni_type = type(''), isinstance=isinstance): """ The encode() constructor is a legacy function that is also a convenience for the PdfWriter. """ diff --git a/pdfrw/pagemerge.py b/pdfrw/pagemerge.py index 4555110..81782df 100644 --- a/pdfrw/pagemerge.py +++ b/pdfrw/pagemerge.py @@ -196,7 +196,7 @@ def do_xobjs(xobj_list, restore_first=False): if xobjs is None: xobjs = resources.XObject = PdfDict() else: - allkeys = xobjs.keys() + allkeys = list(xobjs.keys()) if allkeys: keys = (x for x in allkeys if x.startswith('/pdfrw_')) keys = (x for x in keys if x[7:].isdigit()) @@ -246,5 +246,5 @@ def xobj_box(self): ''' Return the smallest box that encloses every object in the list. ''' - a, b, c, d = zip(*(xobj.box for xobj in self)) + a, b, c, d = list(zip(*(xobj.box for xobj in self))) return PdfArray((min(a), min(b), max(c), max(d))) diff --git a/pdfrw/pdfreader.py b/pdfrw/pdfreader.py index c2ae030..bdfe877 100644 --- a/pdfrw/pdfreader.py +++ b/pdfrw/pdfreader.py @@ -63,7 +63,7 @@ def readdict(self, source, PdfDict=PdfDict): ''' specialget = self.special.get result = PdfDict() - next = source.next + next = source.__next__ tok = next() while tok != '>>': @@ -198,7 +198,7 @@ def loadindirect(self, key, PdfDict=PdfDict, ok = ok and objid[2] == 'obj' if not ok: source.floc = offset - source.next() + next(source) objheader = '%d %d obj' % (objnum, gennum) fdata = source.fdata offset2 = (fdata.find('\n' + objheader) + 1 or @@ -214,7 +214,7 @@ def loadindirect(self, key, PdfDict=PdfDict, # Read the object, and call special code if it starts # an array or dictionary - obj = source.next() + obj = next(source) func = self.special.get(obj) if func is not None: obj = func(source) @@ -225,7 +225,7 @@ def loadindirect(self, key, PdfDict=PdfDict, # Mark the object as indirect, and # just return it if it is a simple object. obj.indirect = key - tok = source.next() + tok = next(source) if tok == 'endobj': return obj @@ -271,13 +271,13 @@ def decrypt_all(self): if self.crypt_filters is not None: crypt.decrypt_objects( - self.indirect_objects.values(), self.stream_crypt_filter, + list(self.indirect_objects.values()), self.stream_crypt_filter, self.crypt_filters) def uncompress(self): self.read_all() - uncompress(self.indirect_objects.values()) + uncompress(list(self.indirect_objects.values())) def load_stream_objects(self, object_streams): # read object streams @@ -299,7 +299,7 @@ def load_stream_objects(self, object_streams): for obj in objs: objsource = PdfTokens(obj.stream, 0, False) - next = objsource.next + next = objsource.__next__ offsets = [] firstoffset = int(obj.First) while objsource.floc < firstoffset: @@ -329,7 +329,7 @@ def findxref(self, fdata): if startloc < 0: raise PdfParseError('Did not find "startxref" at end of file') source = PdfTokens(fdata, startloc, False, self.verbose) - tok = source.next() + tok = next(source) assert tok == 'startxref' # (We just checked this...) tableloc = source.next_default() if not tableloc.isdigit(): @@ -353,7 +353,7 @@ def readint(s, lengths): offset = next setdefault = source.obj_offsets.setdefault - next = source.next + next = source.__next__ # check for xref stream object objid = source.multiple(3) ok = len(objid) == 3 @@ -376,7 +376,7 @@ def readint(s, lengths): stream = stream if stream is not old_strm else convert_store(old_strm) num_pairs = obj.Index or PdfArray(['0', obj.Size]) num_pairs = [int(x) for x in num_pairs] - num_pairs = zip(num_pairs[0::2], num_pairs[1::2]) + num_pairs = list(zip(num_pairs[0::2], num_pairs[1::2])) entry_sizes = [int(x) for x in obj.W] if len(entry_sizes) != 3: source.exception('Invalid entry size') @@ -399,7 +399,7 @@ def parse_xref_table(self, source, int=int, range=range): ''' Parse (one of) the cross-reference file section(s) ''' setdefault = source.obj_offsets.setdefault - next = source.next + next = source.__next__ # plain xref table start = source.floc try: @@ -448,7 +448,7 @@ def parse_xref_table(self, source, int=int, range=range): def parsexref(self, source): ''' Parse (one of) the cross-reference file section(s) ''' - next = source.next + next = source.__next__ try: tok = next() except StopIteration: @@ -619,7 +619,7 @@ def __init__(self, fname=None, fdata=None, decompress=False, trailer, is_stream = self.parsexref(source) prev = trailer.Prev if prev is None: - token = source.next() + token = next(source) if token != 'startxref' and not xref_list: source.warning('Expected "startxref" ' 'at end of xref table') diff --git a/pdfrw/pdfwriter.py b/pdfrw/pdfwriter.py index 3c887ba..05f11bf 100755 --- a/pdfrw/pdfwriter.py +++ b/pdfrw/pdfwriter.py @@ -29,12 +29,12 @@ def user_fmt(obj, isinstance=isinstance, float=float, str=str, - basestring=(type(u''), type(b'')), encode=PdfString.encode): + str=(type(''), type(b'')), encode=PdfString.encode): ''' This function may be replaced by the user for specialized formatting requirements. ''' - if isinstance(obj, basestring): + if isinstance(obj, str): return encode(obj) # PDFs don't handle exponent notation @@ -138,7 +138,7 @@ def format_obj(obj): if compress and obj.stream: do_compress([obj]) pairs = sorted((getattr(x, 'encoded', None) or x, y) - for (x, y) in obj.iteritems()) + for (x, y) in obj.items()) myarray = [] for key, value in pairs: myarray.append(key) @@ -380,6 +380,6 @@ def make_canonical(self): if isinstance(obj, PdfArray): workitems += obj else: - workitems += obj.values() + workitems += list(obj.values()) replaceable = set(vars()) \ No newline at end of file diff --git a/pdfrw/py23_diffs.py b/pdfrw/py23_diffs.py index b3509d0..995ea47 100644 --- a/pdfrw/py23_diffs.py +++ b/pdfrw/py23_diffs.py @@ -10,7 +10,7 @@ zlib = None try: - unicode = unicode + str = str except NameError: def convert_load(s): diff --git a/pdfrw/toreportlab.py b/pdfrw/toreportlab.py index 3434fbf..2dfb0ee 100644 --- a/pdfrw/toreportlab.py +++ b/pdfrw/toreportlab.py @@ -67,7 +67,7 @@ def _makedict(rldoc, pdfobj): rlobj = rldoc.Reference(rlobj) pdfobj.derived_rl_obj[rldoc] = rlobj, None - for key, value in pdfobj.iteritems(): + for key, value in pdfobj.items(): rldict[key[1:]] = makerl_recurse(rldoc, value) return rlobj @@ -85,7 +85,7 @@ def _makestream(rldoc, pdfobj, xobjtype=PdfName.XObject): result = rldoc.Reference(rlobj, fullname) pdfobj.derived_rl_obj[rldoc] = result, shortname - for key, value in pdfobj.iteritems(): + for key, value in pdfobj.items(): rldict[key[1:]] = makerl_recurse(rldoc, value) return result diff --git a/pdfrw/uncompress.py b/pdfrw/uncompress.py index 1921817..9dc4980 100644 --- a/pdfrw/uncompress.py +++ b/pdfrw/uncompress.py @@ -94,21 +94,21 @@ def flate_png_impl(data, predictor=1, columns=1, colors=1, bpc=8): def subfilter(data, prior_row_data, start, length, pixel_size): # filter type 1: Sub # Recon(x) = Filt(x) + Recon(a) - for i in xrange(pixel_size, length): + for i in range(pixel_size, length): left = data[start + i - pixel_size] data[start + i] = (data[start + i] + left) % 256 def upfilter(data, prior_row_data, start, length, pixel_size): # filter type 2: Up # Recon(x) = Filt(x) + Recon(b) - for i in xrange(length): + for i in range(length): up = prior_row_data[i] data[start + i] = (data[start + i] + up) % 256 def avgfilter(data, prior_row_data, start, length, pixel_size): # filter type 3: Avg # Recon(x) = Filt(x) + floor((Recon(a) + Recon(b)) / 2) - for i in xrange(length): + for i in range(length): left = data[start + i - pixel_size] if i >= pixel_size else 0 up = prior_row_data[i] floor = math.floor((left + up) / 2) @@ -128,7 +128,7 @@ def paeth_predictor(a, b, c): return b else: return c - for i in xrange(length): + for i in range(length): left = data[start + i - pixel_size] if i >= pixel_size else 0 up = prior_row_data[i] up_left = prior_row_data[i - pixel_size] if i >= pixel_size else 0 @@ -143,8 +143,8 @@ def paeth_predictor(a, b, c): data.extend([0] * padding) assert len(data) % rowlen == 0 - rows = xrange(0, len(data), rowlen) - prior_row_data = [ 0 for i in xrange(columnbytes) ] + rows = range(0, len(data), rowlen) + prior_row_data = [ 0 for i in range(columnbytes) ] for row_index in rows: filter_type = data[row_index] diff --git a/tests/checkdiffs.py b/tests/checkdiffs.py index 4d11888..9dc1113 100755 --- a/tests/checkdiffs.py +++ b/tests/checkdiffs.py @@ -5,7 +5,7 @@ import subprocess import hashlib -import expected +from . import expected import static_pdfs source_pdfs = static_pdfs.pdffiles[0] @@ -22,7 +22,7 @@ srcf = source_pdfs.get(pdffile) dstf = os.path.join(dstd, pdffile) if pdffile not in source_pdfs: - print('\n Skipping %s -- source not found' % testname) + print(('\n Skipping %s -- source not found' % testname)) continue with open(dstf, 'rb') as f: @@ -30,7 +30,7 @@ hash = hashlib.md5(data).hexdigest() skipset = set((hash, 'skip', 'xfail', 'fail', '!' + hash)) if expected.results[testname] & skipset: - print('\n Skipping %s -- marked done' % testname) + print(('\n Skipping %s -- marked done' % testname)) continue if os.path.exists('foobar.pdf'): os.remove('foobar.pdf') @@ -50,7 +50,7 @@ n = next pdf without marking q = quit --> ''' % testname) - sel = raw_input() + sel = input() if sel == 'q': raise SystemExit(0) if sel == 'n': diff --git a/tests/myprofile.py b/tests/myprofile.py index af18a64..2a62396 100644 --- a/tests/myprofile.py +++ b/tests/myprofile.py @@ -1,5 +1,5 @@ import cProfile import unittest -import test_roundtrip +from . import test_roundtrip cProfile.run('unittest.main(test_roundtrip)') diff --git a/tests/test_examples.py b/tests/test_examples.py index 6871b80..8ddf481 100755 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -32,7 +32,7 @@ import hashlib import subprocess import static_pdfs -import expected +from . import expected from pdfrw.py23_diffs import convert_store from pdfrw import PdfReader, PdfWriter diff --git a/tests/test_flate_png.py b/tests/test_flate_png.py index 83c4236..66a7442 100755 --- a/tests/test_flate_png.py +++ b/tests/test_flate_png.py @@ -33,9 +33,9 @@ def filepath(filename): def create_data(nc=1, nr=1, bpc=8, ncolors=1, filter_type=0): pixel_size = (bpc * ncolors + 7) // 8 data = [] - for r in xrange(nr): + for r in range(nr): data.append(filter_type if r > 0 else 0) # filter byte - for c in xrange(nc * pixel_size): + for c in range(nc * pixel_size): data.append(r * nc * pixel_size + c * pixel_size) data = array.array('B', data) logging.debug("Data: %r" % (data)) diff --git a/tests/test_pdfstring.py b/tests/test_pdfstring.py index 0ea91ad..ddeee92 100755 --- a/tests/test_pdfstring.py +++ b/tests/test_pdfstring.py @@ -21,7 +21,7 @@ class TestBaseEncoding(unittest.TestCase): def encode(self, value): x = PdfString.encode(value) - if isinstance(value, type(u'')): + if isinstance(value, type('')): y = PdfString.from_unicode(value) else: y = PdfString.from_bytes(value) @@ -53,13 +53,13 @@ def test_doubleslash(self): def test_unicode_encoding(self): # These chars are in PdfDocEncoding - self.assertEqual(self.roundtrip(u'PDF™©®')[0], '(') + self.assertEqual(self.roundtrip('PDF™©®')[0], '(') # These chars are not in PdfDocEncoding - self.assertEqual(self.roundtrip(u'δΩσ')[0], '<') + self.assertEqual(self.roundtrip('δΩσ')[0], '<') # Check that we're doing a reasonable encoding # Might want to change this later if we change the definition of reasonable - self.roundtrip(u'(\n\u00FF', '(\\(\n\xff)') - self.roundtrip(u'(\n\u0101', '') + self.roundtrip('(\n\u00FF', '(\\(\n\xff)') + self.roundtrip('(\n\u0101', '') def test_constructor(self): @@ -98,12 +98,12 @@ def test_unescaping(self): self.decode_bytes(r'(\ )', ' ') def test_BOM_variants(self): - self.roundtrip(u'\ufeff', '') - self.roundtrip(u'\ufffe', '') - self.roundtrip(u'\xfe\xff', '') - self.roundtrip(u'\xff\xfe', '(\xff\xfe)') + self.roundtrip('\ufeff', '') + self.roundtrip('\ufffe', '') + self.roundtrip('\xfe\xff', '') + self.roundtrip('\xff\xfe', '(\xff\xfe)') self.assertRaises(UnicodeError, PdfString.from_unicode, - u'þÿ blah', text_encoding='pdfdocencoding') + 'þÿ blah', text_encoding='pdfdocencoding') def test_byte_encode(self): self.assertEqual(self.encode(b'ABC'), '(ABC)') diff --git a/tests/test_roundtrip.py b/tests/test_roundtrip.py index 2e097b6..729815b 100755 --- a/tests/test_roundtrip.py +++ b/tests/test_roundtrip.py @@ -31,7 +31,7 @@ import hashlib import pdfrw import static_pdfs -import expected +from . import expected from pdfrw.py23_diffs import convert_store diff --git a/tests/update_expected.py b/tests/update_expected.py index bed5331..d962c0d 100755 --- a/tests/update_expected.py +++ b/tests/update_expected.py @@ -30,7 +30,7 @@ def make_canonical(trailer): if isinstance(obj, PdfArray): workitems += obj else: - workitems += obj.values() + workitems += list(obj.values()) return trailer with open('expected.txt', 'rb') as f: @@ -58,7 +58,7 @@ def get_digest(fname): src_digest = get_digest(src) if not src_digest or src_digest not in expected: continue - print src + print(src) count += 1 trailer = make_canonical(PdfReader(src)) out = PdfWriter(tmp) @@ -72,10 +72,10 @@ def get_digest(fname): if get_digest(tmp) != match_digest: continue goodcount += 1 - print "OK" + print("OK") changes.append((src_digest, get_digest(dst))) -print count, goodcount +print(count, goodcount) for stuff in changes: expected = expected.replace(*stuff) From 279f8a160e1b3e32a4572b8d74faab4450aad1a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anti=20R=C3=A4is?= Date: Wed, 16 Aug 2023 13:58:39 +0300 Subject: [PATCH 3/4] fix variable name --- pdfrw/pdfwriter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pdfrw/pdfwriter.py b/pdfrw/pdfwriter.py index 05f11bf..beccde7 100755 --- a/pdfrw/pdfwriter.py +++ b/pdfrw/pdfwriter.py @@ -29,12 +29,12 @@ def user_fmt(obj, isinstance=isinstance, float=float, str=str, - str=(type(''), type(b'')), encode=PdfString.encode): + basestring=(type(u''), type(b'')), encode=PdfString.encode): ''' This function may be replaced by the user for specialized formatting requirements. ''' - if isinstance(obj, str): + if isinstance(obj, basestring): return encode(obj) # PDFs don't handle exponent notation @@ -382,4 +382,4 @@ def make_canonical(self): else: workitems += list(obj.values()) - replaceable = set(vars()) \ No newline at end of file + replaceable = set(vars()) From f5e5d44f145877e27748ab41d0b49cd68274d37c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anti=20R=C3=A4is?= Date: Mon, 29 Apr 2024 12:00:51 +0300 Subject: [PATCH 4/4] fix docstring escape errors --- pdfrw/objects/pdfstring.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pdfrw/objects/pdfstring.py b/pdfrw/objects/pdfstring.py index 4415237..072e0f5 100644 --- a/pdfrw/objects/pdfstring.py +++ b/pdfrw/objects/pdfstring.py @@ -52,7 +52,7 @@ A literal string may encode bytes almost unmolested. The caveat is that if a byte has the same value as a parenthesis, it must be escaped so that the tokenizer knows the string is not finished. This is accomplished -by using the ASCII backslash ("\") as an escape character. Of course, +by using the ASCII backslash ("\\") as an escape character. Of course, now any backslash appearing in the data must likewise be escaped. Hexadecimal strings @@ -117,7 +117,7 @@ in literal strings is to not escape parentheses. This only works, and is only allowed, when the parentheses are properly balanced. For example, "((Hello))" is a valid encoding for a literal string, but "((Hello)" is not; -the latter case should be encoded "(\(Hello)" +the latter case should be encoded "(\\(Hello)" Encoding text into strings ========================== @@ -372,12 +372,12 @@ def decode_literal(self): Possible string escapes from the spec: (PDF 1.7 Reference, section 3.2.3, page 53) - 1. \[nrtbf\()]: simple escapes + 1. \\[nrtbf\\()]: simple escapes 2. \\d{1,3}: octal. Must be zero-padded to 3 digits if followed by digit - 3. \: line continuation. We don't know the EOL + 3. \\: line continuation. We don't know the EOL marker used in the PDF, so accept \r, \n, and \r\n. - 4. Any other character following \ escape -- the backslash + 4. Any other character following \\ escape -- the backslash is swallowed. """ result = (self.unescape_func or self.init_unescapes())(self[1:-1])