From 527728db774c9600821df76af6a78e53bca8d316 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 17 Jun 2024 22:23:53 +0800 Subject: [PATCH] Add option to skip password when serializing filesystem (#1625) --- fsspec/json.py | 6 ++++-- fsspec/spec.py | 41 +++++++++++++++++++++++++++++++++++---- fsspec/tests/test_spec.py | 14 +++++++++++++ 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/fsspec/json.py b/fsspec/json.py index 3d1e47353..81d1f3a91 100644 --- a/fsspec/json.py +++ b/fsspec/json.py @@ -1,16 +1,18 @@ import json from contextlib import suppress from pathlib import PurePath -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any, Callable, ClassVar, Dict, List, Optional, Tuple from .registry import _import_class, get_filesystem_class from .spec import AbstractFileSystem class FilesystemJSONEncoder(json.JSONEncoder): + include_password: ClassVar[bool] = True + def default(self, o: Any) -> Any: if isinstance(o, AbstractFileSystem): - return o.to_dict() + return o.to_dict(include_password=self.include_password) if isinstance(o, PurePath): cls = type(o) return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)} diff --git a/fsspec/spec.py b/fsspec/spec.py index 951e9a2f9..edc7abfc5 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -1386,20 +1386,38 @@ def read_block(self, fn, offset, length, delimiter=None): length = size - offset return read_block(f, offset, length, delimiter) - def to_json(self) -> str: + def to_json(self, *, include_password: bool = True) -> str: """ JSON representation of this filesystem instance. + Parameters + ---------- + include_password: bool, default True + Whether to include the password (if any) in the output. + Returns ------- JSON string with keys ``cls`` (the python location of this class), protocol (text name of this class's protocol, first one in case of multiple), ``args`` (positional args, usually empty), and all other keyword arguments as their own keys. + + Warnings + -------- + Serialized filesystems may contain sensitive information which have been + passed to the constructor, such as passwords and tokens. Make sure you + store and send them in a secure environment! """ from .json import FilesystemJSONEncoder - return json.dumps(self, cls=FilesystemJSONEncoder) + return json.dumps( + self, + cls=type( + "_FilesystemJSONEncoder", + (FilesystemJSONEncoder,), + {"include_password": include_password}, + ), + ) @staticmethod def from_json(blob: str) -> AbstractFileSystem: @@ -1426,25 +1444,40 @@ def from_json(blob: str) -> AbstractFileSystem: return json.loads(blob, cls=FilesystemJSONDecoder) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self, *, include_password: bool = True) -> Dict[str, Any]: """ JSON-serializable dictionary representation of this filesystem instance. + Parameters + ---------- + include_password: bool, default True + Whether to include the password (if any) in the output. + Returns ------- Dictionary with keys ``cls`` (the python location of this class), protocol (text name of this class's protocol, first one in case of multiple), ``args`` (positional args, usually empty), and all other keyword arguments as their own keys. + + Warnings + -------- + Serialized filesystems may contain sensitive information which have been + passed to the constructor, such as passwords and tokens. Make sure you + store and send them in a secure environment! """ cls = type(self) proto = self.protocol + storage_options = dict(self.storage_options) + if not include_password: + storage_options.pop("password", None) + return dict( cls=f"{cls.__module__}:{cls.__name__}", protocol=proto[0] if isinstance(proto, (tuple, list)) else proto, args=self.storage_args, - **self.storage_options, + **storage_options, ) @staticmethod diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index 32db78c2b..2bb948b75 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -912,6 +912,20 @@ def test_dict_idempotent(): assert DummyTestFS.from_dict(outa) is a +def test_serialize_no_password(): + fs = DummyTestFS(1, password="admin") + + assert "password" not in fs.to_json(include_password=False) + assert "password" not in fs.to_dict(include_password=False) + + +def test_serialize_with_password(): + fs = DummyTestFS(1, password="admin") + + assert "password" in fs.to_json(include_password=True) + assert "password" in fs.to_dict(include_password=True) + + def test_from_dict_valid(): fs = DummyTestFS.from_dict({"cls": "fsspec.tests.test_spec.DummyTestFS"}) assert isinstance(fs, DummyTestFS)