From e23b5bcd29675cb05726eb1540a8a61397af7dde Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Thu, 21 Mar 2024 21:55:10 +0100 Subject: [PATCH 1/3] feat(parse_header): provide our own implementation of `parse_header()` --- README.rst | 2 +- docs/api/util.rst | 5 ++ docs/user/recipes/pretty-json.rst | 3 +- falcon/__init__.py | 1 + falcon/asgi/multipart.py | 5 +- falcon/media/multipart.py | 10 ++-- falcon/testing/helpers.py | 4 +- falcon/util/__init__.py | 1 + falcon/util/mediatypes.py | 89 ++++++++++++++++++++++++++++ falcon/vendor/mimeparse/mimeparse.py | 4 +- pyproject.toml | 1 - tests/test_mediatypes.py | 33 +++++++++++ 12 files changed, 142 insertions(+), 16 deletions(-) create mode 100644 falcon/util/mediatypes.py create mode 100644 tests/test_mediatypes.py diff --git a/README.rst b/README.rst index 26105b0dd..738b2f2b8 100644 --- a/README.rst +++ b/README.rst @@ -1027,7 +1027,7 @@ See also: `CONTRIBUTING.md `: .. code:: python - import cgi import json import falcon @@ -66,7 +65,7 @@ implemented with a :ref:`custom media handler `: return json.loads(data.decode()) def serialize(self, media, content_type): - _, params = cgi.parse_header(content_type) + _, params = falcon.parse_header(content_type) indent = params.get('indent') if indent is not None: try: diff --git a/falcon/__init__.py b/falcon/__init__.py index 1d33539b3..745856058 100644 --- a/falcon/__init__.py +++ b/falcon/__init__.py @@ -77,6 +77,7 @@ from falcon.util import IS_64_BITS from falcon.util import is_python_func from falcon.util import misc +from falcon.util import parse_header from falcon.util import reader from falcon.util import runs_sync from falcon.util import secure_filename diff --git a/falcon/asgi/multipart.py b/falcon/asgi/multipart.py index b268c2b5a..52cb0505e 100644 --- a/falcon/asgi/multipart.py +++ b/falcon/asgi/multipart.py @@ -14,11 +14,10 @@ """ASGI multipart form media handler components.""" -import cgi - from falcon.asgi.reader import BufferedReader from falcon.errors import DelimiterError from falcon.media import multipart +from falcon.util.mediatypes import parse_header _ALLOWED_CONTENT_HEADERS = multipart._ALLOWED_CONTENT_HEADERS _CRLF = multipart._CRLF @@ -54,7 +53,7 @@ async def get_media(self): return self._media async def get_text(self): - content_type, options = cgi.parse_header(self.content_type) + content_type, options = parse_header(self.content_type) if content_type != 'text/plain': return None diff --git a/falcon/media/multipart.py b/falcon/media/multipart.py index c3fc37d56..5b55d4b4f 100644 --- a/falcon/media/multipart.py +++ b/falcon/media/multipart.py @@ -14,7 +14,6 @@ """Multipart form media handler.""" -import cgi import re from urllib.parse import unquote_to_bytes @@ -24,6 +23,7 @@ from falcon.stream import BoundedStream from falcon.util import BufferedReader from falcon.util import misc +from falcon.util.mediatypes import parse_header # TODO(vytas): @@ -249,7 +249,7 @@ def get_text(self): str: The part decoded as a text string provided the part is encoded as ``text/plain``, ``None`` otherwise. """ - content_type, options = cgi.parse_header(self.content_type) + content_type, options = parse_header(self.content_type) if content_type != 'text/plain': return None @@ -275,7 +275,7 @@ def filename(self): if self._content_disposition is None: value = self._headers.get(b'content-disposition', b'') - self._content_disposition = cgi.parse_header(value.decode()) + self._content_disposition = parse_header(value.decode()) _, params = self._content_disposition @@ -311,7 +311,7 @@ def name(self): if self._content_disposition is None: value = self._headers.get(b'content-disposition', b'') - self._content_disposition = cgi.parse_header(value.decode()) + self._content_disposition = parse_header(value.decode()) _, params = self._content_disposition self._name = params.get('name') @@ -493,7 +493,7 @@ def __init__(self, parse_options=None): def _deserialize_form( self, stream, content_type, content_length, form_cls=MultipartForm ): - _, options = cgi.parse_header(content_type) + _, options = parse_header(content_type) try: boundary = options['boundary'] except KeyError: diff --git a/falcon/testing/helpers.py b/falcon/testing/helpers.py index 00959495a..39e8c12f8 100644 --- a/falcon/testing/helpers.py +++ b/falcon/testing/helpers.py @@ -23,7 +23,6 @@ """ import asyncio -import cgi from collections import defaultdict from collections import deque import contextlib @@ -51,6 +50,7 @@ from falcon.constants import SINGLETON_HEADERS import falcon.request from falcon.util import uri +from falcon.util.mediatypes import parse_header # NOTE(kgriffs): Changed in 3.0 from 'curl/7.24.0 (x86_64-apple-darwin12.0)' DEFAULT_UA = 'falcon-client/' + falcon.__version__ @@ -802,7 +802,7 @@ def get_encoding_from_headers(headers): if not content_type: return None - content_type, params = cgi.parse_header(content_type) + content_type, params = parse_header(content_type) if 'charset' in params: return params['charset'].strip('\'"') diff --git a/falcon/util/__init__.py b/falcon/util/__init__.py index 3fec8b06e..1cead07e8 100644 --- a/falcon/util/__init__.py +++ b/falcon/util/__init__.py @@ -29,6 +29,7 @@ from falcon.util.deprecation import deprecated from falcon.util.deprecation import deprecated_args from falcon.util.deprecation import DeprecatedWarning +from falcon.util.mediatypes import parse_header from falcon.util.misc import code_to_http_status from falcon.util.misc import dt_to_http from falcon.util.misc import get_argnames diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py new file mode 100644 index 000000000..536fd19cf --- /dev/null +++ b/falcon/util/mediatypes.py @@ -0,0 +1,89 @@ +# Copyright 2023-2024 by Vytautas Liuolia. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Media (aka MIME) type parsing and matching utilities.""" + +import typing + + +def _parse_header_old_stdlib(line): # type: ignore + """Parse a Content-type like header. + + Return the main content-type and a dictionary of options. + + Note: + This method has been copied (almost) verbatim from CPython 3.8 stdlib. + It is slated for removal from the stdlib in 3.13. + """ + + def _parseparam(s): # type: ignore + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] + + parts = _parseparam(';' + line) + key = parts.__next__() + pdict = {} + for p in parts: + i = p.find('=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1 :].strip() + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + value = value.replace('\\\\', '\\').replace('\\"', '"') + pdict[name] = value + return key, pdict + + +def parse_header(line: str) -> typing.Tuple[str, dict]: + """Parse a Content-type like header. + + Return the main content-type and a dictionary of options. + + Args: + line: A header value to parse. + + Returns: + A tuple containing the main content-type and a dictionary of options. + + Note: + This function replaces an equivalent method previously available in the + stdlib as ``cgi.parse_header()``. + It was removed from the stdlib in Python 3.13. + """ + if '"' not in line and '\\' not in line: + key, semicolon, parts = line.partition(';') + if not semicolon: + return (key.strip(), {}) + + pdict = {} + for part in parts.split(';'): + name, equals, value = part.partition('=') + if equals: + pdict[name.strip().lower()] = value.strip() + + return (key.strip(), pdict) + + return _parse_header_old_stdlib(line) + + +__all__ = ['parse_header'] diff --git a/falcon/vendor/mimeparse/mimeparse.py b/falcon/vendor/mimeparse/mimeparse.py index 0218553cf..f96e63384 100755 --- a/falcon/vendor/mimeparse/mimeparse.py +++ b/falcon/vendor/mimeparse/mimeparse.py @@ -1,4 +1,4 @@ -import cgi +from falcon.util.mediatypes import parse_header __version__ = '1.6.0' __author__ = 'Joe Gregorio' @@ -23,7 +23,7 @@ def parse_mime_type(mime_type): :rtype: (str,str,dict) """ - full_type, params = cgi.parse_header(mime_type) + full_type, params = parse_header(mime_type) # Java URLConnection class sends an Accept header that includes a # single '*'. Turn it into a legal wildcard. if full_type == '*': diff --git a/pyproject.toml b/pyproject.toml index 5ed0c5fab..44a829feb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,6 @@ filterwarnings = [ "ignore:Using or importing the ABCs:DeprecationWarning", "ignore:cannot collect test class 'TestClient':pytest.PytestCollectionWarning", "ignore:inspect.getargspec\\(\\) is deprecated:DeprecationWarning", - "ignore:.cgi. is deprecated and slated for removal:DeprecationWarning", "ignore:path is deprecated\\. Use files\\(\\) instead:DeprecationWarning", "ignore:This process \\(.+\\) is multi-threaded", ] diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py new file mode 100644 index 000000000..7b31f43ab --- /dev/null +++ b/tests/test_mediatypes.py @@ -0,0 +1,33 @@ +import pytest + +from falcon.util import mediatypes + + +@pytest.mark.parametrize( + 'value,expected', + [ + ('', ('', {})), + ('strange', ('strange', {})), + ('text/plain', ('text/plain', {})), + ('text/plain ', ('text/plain', {})), + (' text/plain', ('text/plain', {})), + (' text/plain ', ('text/plain', {})), + (' text/plain ', ('text/plain', {})), + ( + 'falcon/peregrine; key1; key2=value; key3', + ('falcon/peregrine', {'key2': 'value'}), + ), + ('"falcon/peregrine" ; key="value"', ('"falcon/peregrine"', {'key': 'value'})), + ('falcon/peregrine; empty=""', ('falcon/peregrine', {'empty': ''})), + ('falcon/peregrine; quote="', ('falcon/peregrine', {'quote': '"'})), + ('text/plain; charset=utf-8', ('text/plain', {'charset': 'utf-8'})), + ('stuff/strange; missing-value; missing-another', ('stuff/strange', {})), + ('stuff/strange; missing-value\\missing-another', ('stuff/strange', {})), + ( + 'application/falcon; P1 = "key; value"; P2="\\""', + ('application/falcon', {'p1': 'key; value', 'p2': '"'}), + ), + ], +) +def test_parse_header(value, expected): + assert mediatypes.parse_header(value) == expected From 26dc8c91a60c91bfa3bc1a32aa5dd4b3645da618 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Thu, 21 Mar 2024 23:47:53 +0100 Subject: [PATCH 2/3] docs(newsfragments): add a newsfragment + address 1 review comment --- docs/_newsfragments/2066.newandimproved.rst | 4 +++ falcon/util/mediatypes.py | 30 ++++++++++----------- 2 files changed, 19 insertions(+), 15 deletions(-) create mode 100644 docs/_newsfragments/2066.newandimproved.rst diff --git a/docs/_newsfragments/2066.newandimproved.rst b/docs/_newsfragments/2066.newandimproved.rst new file mode 100644 index 000000000..8bbee4797 --- /dev/null +++ b/docs/_newsfragments/2066.newandimproved.rst @@ -0,0 +1,4 @@ +In Python 3.13, the ``cgi`` module is removed entirely from the stdlib, +including its ``parse_header()`` method. Falcon addresses the issue by shipping +an own implementation; :func:`falcon.parse_header` can also be used in your projects +affected by the removal. diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 536fd19cf..c0dca5121 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -17,6 +17,19 @@ import typing +def _parse_param_old_stdlib(s): # type: ignore + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] + + def _parse_header_old_stdlib(line): # type: ignore """Parse a Content-type like header. @@ -26,20 +39,7 @@ def _parse_header_old_stdlib(line): # type: ignore This method has been copied (almost) verbatim from CPython 3.8 stdlib. It is slated for removal from the stdlib in 3.13. """ - - def _parseparam(s): # type: ignore - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - - parts = _parseparam(';' + line) + parts = _parse_param_old_stdlib(';' + line) key = parts.__next__() pdict = {} for p in parts: @@ -63,7 +63,7 @@ def parse_header(line: str) -> typing.Tuple[str, dict]: line: A header value to parse. Returns: - A tuple containing the main content-type and a dictionary of options. + tuple: (the main content-type, dictionary of options). Note: This function replaces an equivalent method previously available in the From e209b2098ddd4cee8364136574ebfcbae874385b Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Wed, 3 Apr 2024 22:05:04 +0200 Subject: [PATCH 3/3] test(test_mediatypes.py): add tests for multiple parameters --- tests/test_mediatypes.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index 7b31f43ab..0fae79b43 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -17,6 +17,14 @@ 'falcon/peregrine; key1; key2=value; key3', ('falcon/peregrine', {'key2': 'value'}), ), + ( + 'audio/pcm;rate=48000;encoding=float;bits=32', + ('audio/pcm', {'bits': '32', 'encoding': 'float', 'rate': '48000'}), + ), + ( + 'falcon/*; genus=falco; family=falconidae; class=aves; ', + ('falcon/*', {'class': 'aves', 'family': 'falconidae', 'genus': 'falco'}), + ), ('"falcon/peregrine" ; key="value"', ('"falcon/peregrine"', {'key': 'value'})), ('falcon/peregrine; empty=""', ('falcon/peregrine', {'empty': ''})), ('falcon/peregrine; quote="', ('falcon/peregrine', {'quote': '"'})),