From 2d09ca17df6a21ff23d16ffd0102d9252c5f8984 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Jun 2019 16:44:44 +0200 Subject: [PATCH 01/16] FIX pass the __dict__ item of a class __dict__ --- cloudpickle/cloudpickle.py | 19 ++++++++++++++++--- tests/cloudpickle_test.py | 23 +++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index c92b2eac4..b640721c9 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -642,10 +642,23 @@ def save_dynamic_class(self, obj): for k in obj.__slots__: clsdict.pop(k, None) - # If type overrides __dict__ as a property, include it in the type - # kwargs. In Python 2, we can't set this attribute after construction. + # A class __dict__ is part of the class state. At unpickling time, it + # must be *initialized* (in an empty state) during class creation and + # updated during class re-hydratation. + # However, a class __dict__ is read-only, and does not support direct + # item assignement. Instead, way to update a class __dict__ is to call + # setattr(k, v) on the underlying class, which has the same effect. + # There is one corner case: if the __dict__ class has itself a + # "__dict__" key (this means that the class likely overrides the + # __dict__ property of its instances), setattr("__dict__", v) will try + # to modify the read-only class __dict__ instead, and fail. As a + # result, if it exists, the class __dict__ must contain its __dict__ + # item when it is initialized and fed to the class reconstructor. __dict__ = clsdict.pop('__dict__', None) - if isinstance(__dict__, property): + if __dict__ is not None: + # Native pickle memoization of dict objects prevents us from + # reference cycles even if __dict__ is now saved before obj is + # memoized. type_kwargs['__dict__'] = __dict__ save = self.save diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index ec1d089bb..39518e113 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1867,6 +1867,29 @@ def __getattr__(self, name): with pytest.raises(pickle.PicklingError, match='recursion'): cloudpickle.dumps(a) + def test___dict__attribute_not_dropped_during_pickling(self): + # Test https://github.com/cloudpipe/cloudpickle/issues/282. cloudpickle + # used to drop __dict__ attributes of classes at pickling time. + pickle_filename = os.path.join(self.tmpdir, 'class_with___dict__.pkl') + _dict = {'some_attribute': 1} + class A: + __dict__ = _dict + a = A() + self.assertEqual(a.__dict__, _dict) + + with open(pickle_filename, "wb") as f: + cloudpickle.dump(a, f, protocol=self.protocol) + + # Depickle the class in a new python session to make sure the class is + # fully-recreated, and not looked-up in existing cloudpickle + # class-tracking constructs. + child_process_script = """ + import pickle + with open("{filename}", "rb") as f: + depickled_a = pickle.load(f) + assert depickled_a.__dict__ == {_dict}, depickled_a.__dict__ + """.format(filename=pickle_filename, _dict=_dict) + assert_run_python_script(textwrap.dedent(child_process_script)) class Protocol2CloudPickleTest(CloudPickleTest): From 8ab2f837ecc04412e1a6868507e9f2faea83ec39 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 13 Jun 2019 17:08:18 +0200 Subject: [PATCH 02/16] FIX similar fix in cloudpickle_fast --- cloudpickle/cloudpickle_fast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index dfe554d62..8343bfa65 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -72,7 +72,7 @@ def _class_getnewargs(obj): type_kwargs["__slots__"] = obj.__slots__ __dict__ = obj.__dict__.get('__dict__', None) - if isinstance(__dict__, property): + if __dict__ is not None: type_kwargs['__dict__'] = __dict__ return (type(obj), obj.__name__, obj.__bases__, type_kwargs, @@ -153,7 +153,7 @@ def _class_getstate(obj): for k in obj.__slots__: clsdict.pop(k, None) - clsdict.pop('__dict__', None) # unpicklable property object + clsdict.pop('__dict__', None) # specified in class reconstruction return (clsdict, {}) From cdd0aa759a1a25e8e98dd262d0cb7ab8002621f2 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 11:41:32 +0200 Subject: [PATCH 03/16] CLN typo --- cloudpickle/cloudpickle.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b640721c9..957a0ac36 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -646,8 +646,9 @@ def save_dynamic_class(self, obj): # must be *initialized* (in an empty state) during class creation and # updated during class re-hydratation. # However, a class __dict__ is read-only, and does not support direct - # item assignement. Instead, way to update a class __dict__ is to call - # setattr(k, v) on the underlying class, which has the same effect. + # item assignement. Instead, the way to update a class __dict__ is to + # call setattr(k, v) on the underlying class, which has the same + # effect. # There is one corner case: if the __dict__ class has itself a # "__dict__" key (this means that the class likely overrides the # __dict__ property of its instances), setattr("__dict__", v) will try From 8b001bea8cb02842b2a464ac2a8af19acc85bd9b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 12:10:33 +0200 Subject: [PATCH 04/16] FIX save __dict__ only if it was overridden. --- cloudpickle/cloudpickle.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 957a0ac36..9ba3c31cd 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -656,10 +656,11 @@ def save_dynamic_class(self, obj): # result, if it exists, the class __dict__ must contain its __dict__ # item when it is initialized and fed to the class reconstructor. __dict__ = clsdict.pop('__dict__', None) - if __dict__ is not None: - # Native pickle memoization of dict objects prevents us from - # reference cycles even if __dict__ is now saved before obj is - # memoized. + + # As __dict__ is part of obj's reconstructor args, __dict__ will be + # saved before obj is memoized. To prevent infinite recursion, we + # must make sure that __dict__ does not contain a reference to obj. + if getattr(__dict__, '__objclass__', None) is not obj: type_kwargs['__dict__'] = __dict__ save = self.save From 6b146c8897985d7f9ac86157ffc30fd1d37fdc83 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 12:12:35 +0200 Subject: [PATCH 05/16] CLN more explicit comment in the tests --- tests/cloudpickle_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 39518e113..eb5bd1cb1 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1880,9 +1880,11 @@ class A: with open(pickle_filename, "wb") as f: cloudpickle.dump(a, f, protocol=self.protocol) - # Depickle the class in a new python session to make sure the class is - # fully-recreated, and not looked-up in existing cloudpickle - # class-tracking constructs. + # Check that the dynamic class defintion is fully reconstructed in a + # new Python subprocess and check the assertion that the content of the + # __dict__ attribute has the expected content there. Testing this in + # the main process is not enough because the existing class definition + # could have been reused directly. child_process_script = """ import pickle with open("{filename}", "rb") as f: From 819bcf3a314cc9b89c436980b9ad7d3999a349f7 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 12:23:54 +0200 Subject: [PATCH 06/16] TST test other pickle filename --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index eb5bd1cb1..b5174a36b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1870,7 +1870,7 @@ def __getattr__(self, name): def test___dict__attribute_not_dropped_during_pickling(self): # Test https://github.com/cloudpipe/cloudpickle/issues/282. cloudpickle # used to drop __dict__ attributes of classes at pickling time. - pickle_filename = os.path.join(self.tmpdir, 'class_with___dict__.pkl') + pickle_filename= os.path.join(self.tmpdir, 'class_with_dict.pkl') _dict = {'some_attribute': 1} class A: __dict__ = _dict From ff598c70ab9d1984d05cd08c78e7b5606010c520 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 12:35:20 +0200 Subject: [PATCH 07/16] FIX python 2 --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b5174a36b..a59671a4a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1872,7 +1872,7 @@ def test___dict__attribute_not_dropped_during_pickling(self): # used to drop __dict__ attributes of classes at pickling time. pickle_filename= os.path.join(self.tmpdir, 'class_with_dict.pkl') _dict = {'some_attribute': 1} - class A: + class A(object): __dict__ = _dict a = A() self.assertEqual(a.__dict__, _dict) From f4eca77728475ff70ea59dafa608370fd06e96dd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 12:37:29 +0200 Subject: [PATCH 08/16] FIX windows compat --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index a59671a4a..2a3df1310 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1887,7 +1887,7 @@ class A(object): # could have been reused directly. child_process_script = """ import pickle - with open("{filename}", "rb") as f: + with open(r"{filename}", "rb") as f: depickled_a = pickle.load(f) assert depickled_a.__dict__ == {_dict}, depickled_a.__dict__ """.format(filename=pickle_filename, _dict=_dict) From 887eca547341c801c9fd17f3f6ff3bd8bed32e23 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 12:51:15 +0200 Subject: [PATCH 09/16] FIX update reducer for python3.8 --- cloudpickle/cloudpickle_fast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 8343bfa65..4c8b3c676 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -72,7 +72,7 @@ def _class_getnewargs(obj): type_kwargs["__slots__"] = obj.__slots__ __dict__ = obj.__dict__.get('__dict__', None) - if __dict__ is not None: + if getattr(__dict__, '__objclass__', None) is not obj: type_kwargs['__dict__'] = __dict__ return (type(obj), obj.__name__, obj.__bases__, type_kwargs, From 5914c7e52a039fc0a77247aca84fab0be91e5c17 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 13:34:26 +0200 Subject: [PATCH 10/16] CLN more explicit documentation and code --- cloudpickle/cloudpickle.py | 14 +++++++++++--- cloudpickle/cloudpickle_fast.py | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 9ba3c31cd..986198d08 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -658,9 +658,17 @@ def save_dynamic_class(self, obj): __dict__ = clsdict.pop('__dict__', None) # As __dict__ is part of obj's reconstructor args, __dict__ will be - # saved before obj is memoized. To prevent infinite recursion, we - # must make sure that __dict__ does not contain a reference to obj. - if getattr(__dict__, '__objclass__', None) is not obj: + # saved before obj is memoized. Thus, we must make sure that cyclic + # references between __dict__ and obj will not trigger infinite + # recursion, i.e that __dict__ is memoized before its populated with + # obj. If __dict__ is a dict, references to obj inside __dict__ are + # safe. But if __dict__ is not overriden, it is a getset_descriptor + # that contains an unsafe reference to obj, and we must not save it. + # Because non-overriden __dict__ attributes will be generated + # automatically at class reconstruction anyways, no information is + # lost. + if (not isinstance(__dict__, types.GetSetDescriptorType) and + getattr(__dict__, '__objclass__', None) is not obj): type_kwargs['__dict__'] = __dict__ save = self.save diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 4c8b3c676..5e4a20a31 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -72,7 +72,8 @@ def _class_getnewargs(obj): type_kwargs["__slots__"] = obj.__slots__ __dict__ = obj.__dict__.get('__dict__', None) - if getattr(__dict__, '__objclass__', None) is not obj: + if (not isinstance(__dict__, types.GetSetDescriptorType) and + getattr(__dict__, '__objclass__', None) is not obj): type_kwargs['__dict__'] = __dict__ return (type(obj), obj.__name__, obj.__bases__, type_kwargs, From 8f3cdea488146093c2287ef307883a2286d597dc Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 13:54:20 +0200 Subject: [PATCH 11/16] [ci downstream] From 447f6aa79bffffac18c346706695535d9fc8b57f Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 14:13:44 +0200 Subject: [PATCH 12/16] FIX add guard if no __dict__ --- cloudpickle/cloudpickle.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 986198d08..e924b7330 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -667,7 +667,8 @@ def save_dynamic_class(self, obj): # Because non-overriden __dict__ attributes will be generated # automatically at class reconstruction anyways, no information is # lost. - if (not isinstance(__dict__, types.GetSetDescriptorType) and + if (__dict__ is not None and + not isinstance(__dict__, types.GetSetDescriptorType) and getattr(__dict__, '__objclass__', None) is not obj): type_kwargs['__dict__'] = __dict__ From 2e925f8b05c249c75726b042dee9b81038db97f0 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 14:14:15 +0200 Subject: [PATCH 13/16] [ci downstream] From 7cb3167efbe7f2ab272b93c5fadbce39eddfc878 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Jun 2019 14:46:02 +0200 Subject: [PATCH 14/16] [ci downstream] From 9060d782d7027728b38adff1538cfe473d2483a1 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 18 Jun 2019 16:04:22 +0200 Subject: [PATCH 15/16] Update cloudpickle/cloudpickle.py Co-Authored-By: Olivier Grisel --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e924b7330..ea3db1ebe 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -647,7 +647,7 @@ def save_dynamic_class(self, obj): # updated during class re-hydratation. # However, a class __dict__ is read-only, and does not support direct # item assignement. Instead, the way to update a class __dict__ is to - # call setattr(k, v) on the underlying class, which has the same + # call setattr(klass, k, v) on the underlying class, which has the same # effect. # There is one corner case: if the __dict__ class has itself a # "__dict__" key (this means that the class likely overrides the From 62e1873c9cda68ebca2e39902d10b1fd2839707b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 18 Jun 2019 16:04:37 +0200 Subject: [PATCH 16/16] Update cloudpickle/cloudpickle.py Co-Authored-By: Olivier Grisel --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index ea3db1ebe..cd6bd2faa 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -651,7 +651,7 @@ def save_dynamic_class(self, obj): # effect. # There is one corner case: if the __dict__ class has itself a # "__dict__" key (this means that the class likely overrides the - # __dict__ property of its instances), setattr("__dict__", v) will try + # __dict__ property of its instances), setattr(klass, "__dict__", v) will try # to modify the read-only class __dict__ instead, and fail. As a # result, if it exists, the class __dict__ must contain its __dict__ # item when it is initialized and fed to the class reconstructor.