From a084c2c7df030e5c9284a50bff1faced4adc36ce Mon Sep 17 00:00:00 2001 From: Rocky Ken <119533257+rocky-ken@users.noreply.github.com> Date: Wed, 25 Sep 2024 08:19:20 -0700 Subject: [PATCH] Add fallback decoding for asgi headers (#2837) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add latin-1 fallback decoding for asgi headers * Add comment for ASGI encoding spec and change to unicode_escape * add unit test for non-utf8 header decoding * add changelog * revert lint * code review changes * Fix changelog * Add ASGIGetter test --------- Co-authored-by: Emídio Neto <9735060+emdneto@users.noreply.github.com> Co-authored-by: Riccardo Magliocchetti --- CHANGELOG.md | 2 ++ .../instrumentation/asgi/__init__.py | 20 ++++++++++++---- .../tests/test_asgi_custom_headers.py | 24 +++++++++++++++++-- .../tests/test_getter.py | 10 ++++++++ 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fa1ab2ba3..a060517cdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#2537](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2537)) - `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-fastapi` Add ability to disable internal HTTP send and receive spans ([#2802](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2802)) +- `opentelemetry-instrumentation-asgi` Add fallback decoding for ASGI headers + ([#2837](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2837)) ### Breaking changes diff --git a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py index d25ca41017..bc45eacaa4 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py @@ -284,9 +284,9 @@ def get( # ASGI header keys are in lower case key = key.lower() decoded = [ - _value.decode("utf8") + _decode_header_item(_value) for (_key, _value) in headers - if _key.decode("utf8").lower() == key + if _decode_header_item(_key).lower() == key ] if not decoded: return None @@ -294,7 +294,7 @@ def get( def keys(self, carrier: dict) -> typing.List[str]: headers = carrier.get("headers") or [] - return [_key.decode("utf8") for (_key, _value) in headers] + return [_decode_header_item(_key) for (_key, _value) in headers] asgi_getter = ASGIGetter() @@ -410,7 +410,9 @@ def collect_custom_headers_attributes( if raw_headers: for key, value in raw_headers: # Decode headers before processing. - headers[key.decode()].append(value.decode()) + headers[_decode_header_item(key)].append( + _decode_header_item(value) + ) return sanitize.sanitize_header_values( headers, @@ -979,3 +981,13 @@ def _parse_active_request_count_attrs( _server_active_requests_count_attrs_new, sem_conv_opt_in_mode, ) + + +def _decode_header_item(value): + try: + return value.decode("utf-8") + except ValueError: + # ASGI header encoding specs, see: + # - https://asgi.readthedocs.io/en/latest/specs/www.html#wsgi-encoding-differences (see: WSGI encoding differences) + # - https://docs.python.org/3/library/codecs.html#text-encodings (see: Text Encodings) + return value.decode("unicode_escape") diff --git a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_custom_headers.py b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_custom_headers.py index f6cb05fbda..1b191e30e7 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_custom_headers.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_custom_headers.py @@ -48,6 +48,14 @@ async def http_app_with_custom_headers(scope, receive, send): b"my-custom-regex-value-3,my-custom-regex-value-4", ), (b"my-secret-header", b"my-secret-value"), + ( + b"non-utf8-header", + b"Moto Z\xb2", + ), + ( + b"Moto-Z\xb2-non-utf8-header-key", + b"Moto Z\xb2", + ), ], } ) @@ -130,6 +138,14 @@ async def test_http_custom_request_headers_in_span_attributes(self): (b"Regex-Test-Header-1", b"Regex Test Value 1"), (b"regex-test-header-2", b"RegexTestValue2,RegexTestValue3"), (b"My-Secret-Header", b"My Secret Value"), + ( + b"non-utf8-header", + b"Moto Z\xb2", + ), + ( + b"Moto-Z\xb2-non-utf8-header-key", + b"Moto Z\xb2", + ), ] ) self.seed_app(self.app) @@ -147,6 +163,8 @@ async def test_http_custom_request_headers_in_span_attributes(self): "http.request.header.regex_test_header_2": ( "RegexTestValue2,RegexTestValue3", ), + "http.request.header.non_utf8_header": ("Moto Z²",), + "http.request.header.moto_z²_non_utf8_header_key": ("Moto Z²",), "http.request.header.my_secret_header": ("[REDACTED]",), } for span in span_list: @@ -223,6 +241,8 @@ async def test_http_custom_response_headers_in_span_attributes(self): "my-custom-regex-value-3,my-custom-regex-value-4", ), "http.response.header.my_secret_header": ("[REDACTED]",), + "http.response.header.non_utf8_header": ("Moto Z²",), + "http.response.header.moto_z²_non_utf8_header_key": ("Moto Z²",), } for span in span_list: if span.kind == SpanKind.SERVER: @@ -418,8 +438,8 @@ async def test_websocket_custom_response_headers_not_in_span_attributes( SANITIZE_FIELDS_TEST_VALUE = ".*my-secret.*" -SERVER_REQUEST_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*" -SERVER_RESPONSE_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*" +SERVER_REQUEST_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*,non-utf8-header,Moto-Z²-non-utf8-header-key" +SERVER_RESPONSE_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*,non-utf8-header,Moto-Z²-non-utf8-header-key" class TestCustomHeadersEnv(TestCustomHeaders): diff --git a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_getter.py b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_getter.py index 26bb652b50..3f76e0e5ce 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_getter.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_getter.py @@ -69,3 +69,13 @@ def test_keys(self): expected_val, "Should be equal", ) + + def test_non_utf8_headers(self): + getter = ASGIGetter() + carrier = {"headers": [(b"test-key", b"Moto Z\xb2")]} + expected_val = ["Moto Z²"] + self.assertEqual( + getter.get(carrier, "test-key"), + expected_val, + "Should be equal", + )