Skip to content

Commit

Permalink
feat(parse_header): provide our own implementation of parse_header()
Browse files Browse the repository at this point in the history
  • Loading branch information
vytas7 committed Mar 21, 2024
1 parent a78cfb3 commit e23b5bc
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@ See also: `CONTRIBUTING.md <https://github.com/falconry/falcon/blob/master/CONTR
Legal
-----

Copyright 2013-2023 by Individual and corporate contributors as
Copyright 2013-2024 by Individual and corporate contributors as
noted in the individual source files.

Licensed under the Apache License, Version 2.0 (the "License"); you may
Expand Down
5 changes: 5 additions & 0 deletions docs/api/util.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ HTTP Status
.. autofunction:: falcon.code_to_http_status
.. autofunction:: falcon.get_http_status

Media types
-----------

.. autofunction:: falcon.parse_header

Async
-----

Expand Down
3 changes: 1 addition & 2 deletions docs/user/recipes/pretty-json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ implemented with a :ref:`custom media handler <custom-media-handler-type>`:

.. code:: python
import cgi
import json
import falcon
Expand All @@ -66,7 +65,7 @@ implemented with a :ref:`custom media handler <custom-media-handler-type>`:
return json.loads(data.decode())
def serialize(self, media, content_type):
_, params = cgi.parse_header(content_type)
_, params = falcon.parse_header(content_type)
indent = params.get('indent')
if indent is not None:
try:
Expand Down
1 change: 1 addition & 0 deletions falcon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
from falcon.util import IS_64_BITS
from falcon.util import is_python_func
from falcon.util import misc
from falcon.util import parse_header
from falcon.util import reader
from falcon.util import runs_sync
from falcon.util import secure_filename
Expand Down
5 changes: 2 additions & 3 deletions falcon/asgi/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@

"""ASGI multipart form media handler components."""

import cgi

from falcon.asgi.reader import BufferedReader
from falcon.errors import DelimiterError
from falcon.media import multipart
from falcon.util.mediatypes import parse_header

_ALLOWED_CONTENT_HEADERS = multipart._ALLOWED_CONTENT_HEADERS
_CRLF = multipart._CRLF
Expand Down Expand Up @@ -54,7 +53,7 @@ async def get_media(self):
return self._media

async def get_text(self):
content_type, options = cgi.parse_header(self.content_type)
content_type, options = parse_header(self.content_type)

Check warning on line 56 in falcon/asgi/multipart.py

View check run for this annotation

Codecov / codecov/patch

falcon/asgi/multipart.py#L56

Added line #L56 was not covered by tests
if content_type != 'text/plain':
return None

Expand Down
10 changes: 5 additions & 5 deletions falcon/media/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

"""Multipart form media handler."""

import cgi
import re
from urllib.parse import unquote_to_bytes

Expand All @@ -24,6 +23,7 @@
from falcon.stream import BoundedStream
from falcon.util import BufferedReader
from falcon.util import misc
from falcon.util.mediatypes import parse_header


# TODO(vytas):
Expand Down Expand Up @@ -249,7 +249,7 @@ def get_text(self):
str: The part decoded as a text string provided the part is
encoded as ``text/plain``, ``None`` otherwise.
"""
content_type, options = cgi.parse_header(self.content_type)
content_type, options = parse_header(self.content_type)

Check warning on line 252 in falcon/media/multipart.py

View check run for this annotation

Codecov / codecov/patch

falcon/media/multipart.py#L252

Added line #L252 was not covered by tests
if content_type != 'text/plain':
return None

Expand All @@ -275,7 +275,7 @@ def filename(self):

if self._content_disposition is None:
value = self._headers.get(b'content-disposition', b'')
self._content_disposition = cgi.parse_header(value.decode())
self._content_disposition = parse_header(value.decode())

Check warning on line 278 in falcon/media/multipart.py

View check run for this annotation

Codecov / codecov/patch

falcon/media/multipart.py#L278

Added line #L278 was not covered by tests

_, params = self._content_disposition

Expand Down Expand Up @@ -311,7 +311,7 @@ def name(self):

if self._content_disposition is None:
value = self._headers.get(b'content-disposition', b'')
self._content_disposition = cgi.parse_header(value.decode())
self._content_disposition = parse_header(value.decode())

Check warning on line 314 in falcon/media/multipart.py

View check run for this annotation

Codecov / codecov/patch

falcon/media/multipart.py#L314

Added line #L314 was not covered by tests

_, params = self._content_disposition
self._name = params.get('name')
Expand Down Expand Up @@ -493,7 +493,7 @@ def __init__(self, parse_options=None):
def _deserialize_form(
self, stream, content_type, content_length, form_cls=MultipartForm
):
_, options = cgi.parse_header(content_type)
_, options = parse_header(content_type)

Check warning on line 496 in falcon/media/multipart.py

View check run for this annotation

Codecov / codecov/patch

falcon/media/multipart.py#L496

Added line #L496 was not covered by tests
try:
boundary = options['boundary']
except KeyError:
Expand Down
4 changes: 2 additions & 2 deletions falcon/testing/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"""

import asyncio
import cgi
from collections import defaultdict
from collections import deque
import contextlib
Expand Down Expand Up @@ -51,6 +50,7 @@
from falcon.constants import SINGLETON_HEADERS
import falcon.request
from falcon.util import uri
from falcon.util.mediatypes import parse_header

# NOTE(kgriffs): Changed in 3.0 from 'curl/7.24.0 (x86_64-apple-darwin12.0)'
DEFAULT_UA = 'falcon-client/' + falcon.__version__
Expand Down Expand Up @@ -802,7 +802,7 @@ def get_encoding_from_headers(headers):
if not content_type:
return None

content_type, params = cgi.parse_header(content_type)
content_type, params = parse_header(content_type)

Check warning on line 805 in falcon/testing/helpers.py

View check run for this annotation

Codecov / codecov/patch

falcon/testing/helpers.py#L805

Added line #L805 was not covered by tests

if 'charset' in params:
return params['charset'].strip('\'"')
Expand Down
1 change: 1 addition & 0 deletions falcon/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from falcon.util.deprecation import deprecated
from falcon.util.deprecation import deprecated_args
from falcon.util.deprecation import DeprecatedWarning
from falcon.util.mediatypes import parse_header
from falcon.util.misc import code_to_http_status
from falcon.util.misc import dt_to_http
from falcon.util.misc import get_argnames
Expand Down
89 changes: 89 additions & 0 deletions falcon/util/mediatypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright 2023-2024 by Vytautas Liuolia.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Media (aka MIME) type parsing and matching utilities."""

import typing


def _parse_header_old_stdlib(line): # type: ignore
"""Parse a Content-type like header.
Return the main content-type and a dictionary of options.
Note:
This method has been copied (almost) verbatim from CPython 3.8 stdlib.
It is slated for removal from the stdlib in 3.13.
"""

def _parseparam(s): # type: ignore

Check warning on line 30 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L30

Added line #L30 was not covered by tests
while s[:1] == ';':
s = s[1:]
end = s.find(';')

Check warning on line 33 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L32-L33

Added lines #L32 - L33 were not covered by tests
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)

Check warning on line 35 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L35

Added line #L35 was not covered by tests
if end < 0:
end = len(s)
f = s[:end]
yield f.strip()
s = s[end:]

Check warning on line 40 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L37-L40

Added lines #L37 - L40 were not covered by tests

parts = _parseparam(';' + line)
key = parts.__next__()
pdict = {}

Check warning on line 44 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L42-L44

Added lines #L42 - L44 were not covered by tests
for p in parts:
i = p.find('=')

Check warning on line 46 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L46

Added line #L46 was not covered by tests
if i >= 0:
name = p[:i].strip().lower()
value = p[i + 1 :].strip()

Check warning on line 49 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L48-L49

Added lines #L48 - L49 were not covered by tests
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace('\\\\', '\\').replace('\\"', '"')
pdict[name] = value
return key, pdict

Check warning on line 54 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L51-L54

Added lines #L51 - L54 were not covered by tests


def parse_header(line: str) -> typing.Tuple[str, dict]:
"""Parse a Content-type like header.
Return the main content-type and a dictionary of options.
Args:
line: A header value to parse.
Returns:
A tuple containing the main content-type and a dictionary of options.
Note:
This function replaces an equivalent method previously available in the
stdlib as ``cgi.parse_header()``.
It was removed from the stdlib in Python 3.13.
"""
if '"' not in line and '\\' not in line:
key, semicolon, parts = line.partition(';')

Check warning on line 74 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L74

Added line #L74 was not covered by tests
if not semicolon:
return (key.strip(), {})

Check warning on line 76 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L76

Added line #L76 was not covered by tests

pdict = {}

Check warning on line 78 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L78

Added line #L78 was not covered by tests
for part in parts.split(';'):
name, equals, value = part.partition('=')

Check warning on line 80 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L80

Added line #L80 was not covered by tests
if equals:
pdict[name.strip().lower()] = value.strip()

Check warning on line 82 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L82

Added line #L82 was not covered by tests

return (key.strip(), pdict)

Check warning on line 84 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L84

Added line #L84 was not covered by tests

return _parse_header_old_stdlib(line)

Check warning on line 86 in falcon/util/mediatypes.py

View check run for this annotation

Codecov / codecov/patch

falcon/util/mediatypes.py#L86

Added line #L86 was not covered by tests


__all__ = ['parse_header']
4 changes: 2 additions & 2 deletions falcon/vendor/mimeparse/mimeparse.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import cgi
from falcon.util.mediatypes import parse_header

__version__ = '1.6.0'
__author__ = 'Joe Gregorio'
Expand All @@ -23,7 +23,7 @@ def parse_mime_type(mime_type):
:rtype: (str,str,dict)
"""
full_type, params = cgi.parse_header(mime_type)
full_type, params = parse_header(mime_type)
# Java URLConnection class sends an Accept header that includes a
# single '*'. Turn it into a legal wildcard.
if full_type == '*':
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ filterwarnings = [
"ignore:Using or importing the ABCs:DeprecationWarning",
"ignore:cannot collect test class 'TestClient':pytest.PytestCollectionWarning",
"ignore:inspect.getargspec\\(\\) is deprecated:DeprecationWarning",
"ignore:.cgi. is deprecated and slated for removal:DeprecationWarning",
"ignore:path is deprecated\\. Use files\\(\\) instead:DeprecationWarning",
"ignore:This process \\(.+\\) is multi-threaded",
]
Expand Down
33 changes: 33 additions & 0 deletions tests/test_mediatypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest

from falcon.util import mediatypes


@pytest.mark.parametrize(
'value,expected',
[
('', ('', {})),
('strange', ('strange', {})),
('text/plain', ('text/plain', {})),
('text/plain ', ('text/plain', {})),
(' text/plain', ('text/plain', {})),
(' text/plain ', ('text/plain', {})),
(' text/plain ', ('text/plain', {})),
(
'falcon/peregrine; key1; key2=value; key3',
('falcon/peregrine', {'key2': 'value'}),
),
('"falcon/peregrine" ; key="value"', ('"falcon/peregrine"', {'key': 'value'})),
('falcon/peregrine; empty=""', ('falcon/peregrine', {'empty': ''})),
('falcon/peregrine; quote="', ('falcon/peregrine', {'quote': '"'})),
('text/plain; charset=utf-8', ('text/plain', {'charset': 'utf-8'})),
('stuff/strange; missing-value; missing-another', ('stuff/strange', {})),
('stuff/strange; missing-value\\missing-another', ('stuff/strange', {})),
(
'application/falcon; P1 = "key; value"; P2="\\""',
('application/falcon', {'p1': 'key; value', 'p2': '"'}),
),
],
)
def test_parse_header(value, expected):
assert mediatypes.parse_header(value) == expected

0 comments on commit e23b5bc

Please sign in to comment.