Skip to content

Commit

Permalink
Add fallback decoding for asgi headers (#2837)
Browse files Browse the repository at this point in the history
* Add latin-1 fallback decoding for asgi headers

* Add comment for ASGI encoding spec and change to unicode_escape

* add unit test for non-utf8 header decoding

* add changelog

* revert lint

* code review changes

* Fix changelog

* Add ASGIGetter test

---------

Co-authored-by: Emídio Neto <[email protected]>
Co-authored-by: Riccardo Magliocchetti <[email protected]>
  • Loading branch information
3 people authored Sep 25, 2024
1 parent 3deb6b9 commit a084c2c
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#2537](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2537))
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-fastapi` Add ability to disable internal HTTP send and receive spans
([#2802](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2802))
- `opentelemetry-instrumentation-asgi` Add fallback decoding for ASGI headers
([#2837](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2837))

### Breaking changes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,17 +284,17 @@ def get(
# ASGI header keys are in lower case
key = key.lower()
decoded = [
_value.decode("utf8")
_decode_header_item(_value)
for (_key, _value) in headers
if _key.decode("utf8").lower() == key
if _decode_header_item(_key).lower() == key
]
if not decoded:
return None
return decoded

def keys(self, carrier: dict) -> typing.List[str]:
headers = carrier.get("headers") or []
return [_key.decode("utf8") for (_key, _value) in headers]
return [_decode_header_item(_key) for (_key, _value) in headers]


asgi_getter = ASGIGetter()
Expand Down Expand Up @@ -410,7 +410,9 @@ def collect_custom_headers_attributes(
if raw_headers:
for key, value in raw_headers:
# Decode headers before processing.
headers[key.decode()].append(value.decode())
headers[_decode_header_item(key)].append(
_decode_header_item(value)
)

return sanitize.sanitize_header_values(
headers,
Expand Down Expand Up @@ -979,3 +981,13 @@ def _parse_active_request_count_attrs(
_server_active_requests_count_attrs_new,
sem_conv_opt_in_mode,
)


def _decode_header_item(value):
try:
return value.decode("utf-8")
except ValueError:
# ASGI header encoding specs, see:
# - https://asgi.readthedocs.io/en/latest/specs/www.html#wsgi-encoding-differences (see: WSGI encoding differences)
# - https://docs.python.org/3/library/codecs.html#text-encodings (see: Text Encodings)
return value.decode("unicode_escape")
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ async def http_app_with_custom_headers(scope, receive, send):
b"my-custom-regex-value-3,my-custom-regex-value-4",
),
(b"my-secret-header", b"my-secret-value"),
(
b"non-utf8-header",
b"Moto Z\xb2",
),
(
b"Moto-Z\xb2-non-utf8-header-key",
b"Moto Z\xb2",
),
],
}
)
Expand Down Expand Up @@ -130,6 +138,14 @@ async def test_http_custom_request_headers_in_span_attributes(self):
(b"Regex-Test-Header-1", b"Regex Test Value 1"),
(b"regex-test-header-2", b"RegexTestValue2,RegexTestValue3"),
(b"My-Secret-Header", b"My Secret Value"),
(
b"non-utf8-header",
b"Moto Z\xb2",
),
(
b"Moto-Z\xb2-non-utf8-header-key",
b"Moto Z\xb2",
),
]
)
self.seed_app(self.app)
Expand All @@ -147,6 +163,8 @@ async def test_http_custom_request_headers_in_span_attributes(self):
"http.request.header.regex_test_header_2": (
"RegexTestValue2,RegexTestValue3",
),
"http.request.header.non_utf8_header": ("Moto Z²",),
"http.request.header.moto_z²_non_utf8_header_key": ("Moto Z²",),
"http.request.header.my_secret_header": ("[REDACTED]",),
}
for span in span_list:
Expand Down Expand Up @@ -223,6 +241,8 @@ async def test_http_custom_response_headers_in_span_attributes(self):
"my-custom-regex-value-3,my-custom-regex-value-4",
),
"http.response.header.my_secret_header": ("[REDACTED]",),
"http.response.header.non_utf8_header": ("Moto Z²",),
"http.response.header.moto_z²_non_utf8_header_key": ("Moto Z²",),
}
for span in span_list:
if span.kind == SpanKind.SERVER:
Expand Down Expand Up @@ -418,8 +438,8 @@ async def test_websocket_custom_response_headers_not_in_span_attributes(


SANITIZE_FIELDS_TEST_VALUE = ".*my-secret.*"
SERVER_REQUEST_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*"
SERVER_RESPONSE_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*"
SERVER_REQUEST_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*,non-utf8-header,Moto-Z²-non-utf8-header-key"
SERVER_RESPONSE_TEST_VALUE = "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*,non-utf8-header,Moto-Z²-non-utf8-header-key"


class TestCustomHeadersEnv(TestCustomHeaders):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,13 @@ def test_keys(self):
expected_val,
"Should be equal",
)

def test_non_utf8_headers(self):
getter = ASGIGetter()
carrier = {"headers": [(b"test-key", b"Moto Z\xb2")]}
expected_val = ["Moto Z²"]
self.assertEqual(
getter.get(carrier, "test-key"),
expected_val,
"Should be equal",
)

0 comments on commit a084c2c

Please sign in to comment.