From f6707042d8b18d2a3737380bf58ff020872fb07b Mon Sep 17 00:00:00 2001 From: Bruce Adams Date: Mon, 27 Nov 2023 17:27:43 -0500 Subject: [PATCH 1/2] Unit test for string containing multi-byte UTF-8 There are two tests here. One demonstrating existing, correct behavior for `data=bytes`, and another, failing, test for the case where `data=string` and the string contains multi-byte UTF-8. --- tests/test_requests.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_requests.py b/tests/test_requests.py index a71fe7d6b8..b6fb84d1bd 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1808,6 +1808,23 @@ def test_autoset_header_values_are_native(self, httpbin): assert p.headers["Content-Length"] == length + def test_content_length_for_bytes_data(self, httpbin): + data = "This is a string containing multi-byte UTF-8 ☃️" + encoded_data = data.encode("utf-8") + length = str(len(encoded_data)) + req = requests.Request("POST", httpbin("post"), data=encoded_data) + p = req.prepare() + + assert p.headers["Content-Length"] == length + + def test_content_length_for_string_data_counts_bytes(self, httpbin): + data = "This is a string containing multi-byte UTF-8 ☃️" + length = str(len(data.encode("utf-8"))) + req = requests.Request("POST", httpbin("post"), data=data) + p = req.prepare() + + assert p.headers["Content-Length"] == length + def test_nonhttp_schemes_dont_check_URLs(self): test_urls = ( "data:image/gif;base64,R0lGODlhAQABAHAAACH5BAUAAAAALAAAAAABAAEAAAICRAEAOw==", From 3fd309a5c14e4cfbd96bea6c8e71b4958fe090bb Mon Sep 17 00:00:00 2001 From: Bruce Adams Date: Tue, 28 Nov 2023 13:17:49 -0500 Subject: [PATCH 2/2] Enhance `super_len` to count encoded bytes for str This fixes issue #6586 --- src/requests/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/requests/utils.py b/src/requests/utils.py index c3b123ea4e..a603a8638c 100644 --- a/src/requests/utils.py +++ b/src/requests/utils.py @@ -134,6 +134,9 @@ def super_len(o): total_length = None current_position = 0 + if isinstance(o, str): + o = o.encode("utf-8") + if hasattr(o, "__len__"): total_length = len(o)