From d9031eaad6b3a1885581789b9651a90548545a05 Mon Sep 17 00:00:00 2001 From: jonathan343 Date: Tue, 8 Oct 2024 15:38:09 -0400 Subject: [PATCH] Make CRC32 the default checksum and remove MD5 --- botocore/handlers.py | 19 +- botocore/httpchecksum.py | 72 +++--- botocore/utils.py | 83 +------ tests/functional/test_httpchecksum.py | 313 ++++++++++++++++++++++++++ tests/functional/test_s3.py | 114 ++++++---- tests/unit/test_handlers.py | 159 ------------- tests/unit/test_httpchecksum.py | 75 +++--- 7 files changed, 486 insertions(+), 349 deletions(-) create mode 100644 tests/functional/test_httpchecksum.py diff --git a/botocore/handlers.py b/botocore/handlers.py index 9cb1d052c0..6045328bc1 100644 --- a/botocore/handlers.py +++ b/botocore/handlers.py @@ -52,6 +52,7 @@ ParamValidationError, UnsupportedTLSVersionWarning, ) +from botocore.httpchecksum import DEFAULT_CHECKSUM_ALGORITHM from botocore.regions import EndpointResolverBuiltins from botocore.signers import ( add_generate_db_auth_token, @@ -61,8 +62,6 @@ from botocore.utils import ( SAFE_CHARS, ArnParser, - conditionally_calculate_checksum, - conditionally_calculate_md5, percent_encode, switch_host_with_param, ) @@ -1239,6 +1238,15 @@ def document_expires_shape(section, event_name, **kwargs): ) +def set_default_multipart_checksum_algorithm(params, **kwargs): + """Sets the ``ChecksumAlgorithm`` parameter to the SDKs default checksum. + + The ``CreateMultipartUpload`` operation isn't modeled with the ``httpchecksum`` + trait and requires us to set a default checksum algorithm when none is provided. + """ + params.setdefault('ChecksumAlgorithm', DEFAULT_CHECKSUM_ALGORITHM) + + # This is a list of (event_name, handler). # When a Session is created, everything in this list will be # automatically registered with that Session. @@ -1292,6 +1300,10 @@ def document_expires_shape(section, event_name, **kwargs): 'before-parameter-build.s3.CreateMultipartUpload', validate_ascii_metadata, ), + ( + 'before-parameter-build.s3.CreateMultipartUpload', + set_default_multipart_checksum_algorithm, + ), ('before-parameter-build.s3-control', remove_accid_host_prefix_from_model), ('docs.*.s3.CopyObject.complete-section', document_copy_source_form), ('docs.*.s3.UploadPartCopy.complete-section', document_copy_source_form), @@ -1302,10 +1314,7 @@ def document_expires_shape(section, event_name, **kwargs): ('before-call.s3', add_expect_header), ('before-call.glacier', add_glacier_version), ('before-call.apigateway', add_accept_header), - ('before-call.s3.PutObject', conditionally_calculate_checksum), - ('before-call.s3.UploadPart', conditionally_calculate_md5), ('before-call.s3.DeleteObjects', escape_xml_payload), - ('before-call.s3.DeleteObjects', conditionally_calculate_checksum), ('before-call.s3.PutBucketLifecycleConfiguration', escape_xml_payload), ('before-call.glacier.UploadArchive', add_glacier_checksums), ('before-call.glacier.UploadMultipartPart', add_glacier_checksums), diff --git a/botocore/httpchecksum.py b/botocore/httpchecksum.py index 8056b2bb18..2678e414d9 100644 --- a/botocore/httpchecksum.py +++ b/botocore/httpchecksum.py @@ -32,10 +32,7 @@ MissingDependencyException, ) from botocore.response import StreamingBody -from botocore.utils import ( - conditionally_calculate_md5, - determine_content_length, -) +from botocore.utils import _has_checksum_header, determine_content_length if HAS_CRT: from awscrt import checksums as crt_checksums @@ -44,6 +41,8 @@ logger = logging.getLogger(__name__) +DEFAULT_CHECKSUM_ALGORITHM = "CRC32" + class BaseChecksum: _CHUNK_SIZE = 1024 * 1024 @@ -259,7 +258,18 @@ def resolve_request_checksum_algorithm( params, supported_algorithms=None, ): + # If the header is already set by the customer, skip calculation + if _has_checksum_header(request): + return + + request_checksum_calculation = request["context"][ + "client_config" + ].request_checksum_calculation http_checksum = operation_model.http_checksum + request_checksum_required = ( + operation_model.http_checksum_required + or http_checksum.get("requestChecksumRequired") + ) algorithm_member = http_checksum.get("requestAlgorithmMember") if algorithm_member and algorithm_member in params: # If the client has opted into using flexible checksums and the @@ -280,35 +290,30 @@ def resolve_request_checksum_algorithm( raise FlexibleChecksumError( error_msg=f"Unsupported checksum algorithm: {algorithm_name}" ) + elif request_checksum_required or ( + algorithm_member and request_checksum_calculation == "when_supported" + ): + algorithm_name = "crc32" + else: + return - location_type = "header" - if operation_model.has_streaming_input: - # Operations with streaming input must support trailers. - if request["url"].startswith("https:"): - # We only support unsigned trailer checksums currently. As this - # disables payload signing we'll only use trailers over TLS. - location_type = "trailer" - - algorithm = { - "algorithm": algorithm_name, - "in": location_type, - "name": f"x-amz-checksum-{algorithm_name}", - } - - if algorithm["name"] in request["headers"]: - # If the header is already set by the customer, skip calculation - return + location_type = "header" + if operation_model.has_streaming_input: + # Operations with streaming input must support trailers. + if request["url"].startswith("https:"): + # We only support unsigned trailer checksums currently. As this + # disables payload signing we'll only use trailers over TLS. + location_type = "trailer" + + algorithm = { + "algorithm": algorithm_name, + "in": location_type, + "name": f"x-amz-checksum-{algorithm_name}", + } - checksum_context = request["context"].get("checksum", {}) - checksum_context["request_algorithm"] = algorithm - request["context"]["checksum"] = checksum_context - elif operation_model.http_checksum_required or http_checksum.get( - "requestChecksumRequired" - ): - # Otherwise apply the old http checksum behavior via Content-MD5 - checksum_context = request["context"].get("checksum", {}) - checksum_context["request_algorithm"] = "conditional-md5" - request["context"]["checksum"] = checksum_context + checksum_context = request["context"].get("checksum", {}) + checksum_context["request_algorithm"] = algorithm + request["context"]["checksum"] = checksum_context def apply_request_checksum(request): @@ -318,10 +323,7 @@ def apply_request_checksum(request): if not algorithm: return - if algorithm == "conditional-md5": - # Special case to handle the http checksum required trait - conditionally_calculate_md5(request) - elif algorithm["in"] == "header": + if algorithm["in"] == "header": _apply_request_header_checksum(request) elif algorithm["in"] == "trailer": _apply_request_trailer_checksum(request) diff --git a/botocore/utils.py b/botocore/utils.py index 314d30516d..a14d4f1ea7 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -10,7 +10,6 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import base64 import binascii import datetime import email.message @@ -50,10 +49,8 @@ HAS_CRT, IPV4_RE, IPV6_ADDRZ_RE, - MD5_AVAILABLE, UNSAFE_URL_CHARS, OrderedDict, - get_md5, get_tzinfo_options, json, quote, @@ -3223,44 +3220,11 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'): return default -def calculate_md5(body, **kwargs): - if isinstance(body, (bytes, bytearray)): - binary_md5 = _calculate_md5_from_bytes(body) - else: - binary_md5 = _calculate_md5_from_file(body) - return base64.b64encode(binary_md5).decode('ascii') - - -def _calculate_md5_from_bytes(body_bytes): - md5 = get_md5(body_bytes) - return md5.digest() - - -def _calculate_md5_from_file(fileobj): - start_position = fileobj.tell() - md5 = get_md5() - for chunk in iter(lambda: fileobj.read(1024 * 1024), b''): - md5.update(chunk) - fileobj.seek(start_position) - return md5.digest() - - -def _is_s3express_request(params): - endpoint_properties = params.get('context', {}).get( - 'endpoint_properties', {} - ) - return endpoint_properties.get('backend') == 'S3Express' - - def _has_checksum_header(params): headers = params['headers'] - # If a user provided Content-MD5 is present, - # don't try to compute a new one. - if 'Content-MD5' in headers: - return True # If a header matching the x-amz-checksum-* pattern is present, we - # assume a checksum has already been provided and an md5 is not needed + # assume a checksum has already been provided by the user. for header in headers: if CHECKSUM_HEADER_PATTERN.match(header): return True @@ -3268,51 +3232,6 @@ def _has_checksum_header(params): return False -def conditionally_calculate_checksum(params, **kwargs): - if not _has_checksum_header(params): - conditionally_calculate_md5(params, **kwargs) - conditionally_enable_crc32(params, **kwargs) - - -def conditionally_enable_crc32(params, **kwargs): - checksum_context = params.get('context', {}).get('checksum', {}) - checksum_algorithm = checksum_context.get('request_algorithm') - if ( - _is_s3express_request(params) - and params['body'] is not None - and checksum_algorithm in (None, "conditional-md5") - ): - params['context']['checksum'] = { - 'request_algorithm': { - 'algorithm': 'crc32', - 'in': 'header', - 'name': 'x-amz-checksum-crc32', - } - } - - -def conditionally_calculate_md5(params, **kwargs): - """Only add a Content-MD5 if the system supports it.""" - body = params['body'] - checksum_context = params.get('context', {}).get('checksum', {}) - checksum_algorithm = checksum_context.get('request_algorithm') - if checksum_algorithm and checksum_algorithm != 'conditional-md5': - # Skip for requests that will have a flexible checksum applied - return - - if _has_checksum_header(params): - # Don't add a new header if one is already available. - return - - if _is_s3express_request(params): - # S3Express doesn't support MD5 - return - - if MD5_AVAILABLE and body is not None: - md5_digest = calculate_md5(body, **kwargs) - params['headers']['Content-MD5'] = md5_digest - - class FileWebIdentityTokenLoader: def __init__(self, web_identity_token_path, _open=open): self._web_identity_token_path = web_identity_token_path diff --git a/tests/functional/test_httpchecksum.py b/tests/functional/test_httpchecksum.py new file mode 100644 index 0000000000..a6c78556af --- /dev/null +++ b/tests/functional/test_httpchecksum.py @@ -0,0 +1,313 @@ +# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + + +import pytest + +from tests import ClientHTTPStubber, patch_load_service_model + +TEST_CHECKSUM_SERVICE_MODEL = { + "version": "2.0", + "documentation": "This is a test service.", + "metadata": { + "apiVersion": "2023-01-01", + "endpointPrefix": "test", + "protocol": "rest-json", + "jsonVersion": "1.1", + "serviceFullName": "Test Service", + "serviceId": "Test Service", + "signatureVersion": "v4", + "signingName": "testservice", + "uid": "testservice-2023-01-01", + }, + "operations": { + "HttpChecksumOperation": { + "name": "HttpChecksumOperation", + "http": {"method": "POST", "requestUri": "/HttpChecksumOperation"}, + "input": {"shape": "SomeInput"}, + "output": {"shape": "SomeOutput"}, + "httpChecksum": { + "requestChecksumRequired": True, + "requestAlgorithmMember": "checksumAlgorithm", + "requestValidationModeMember": "validationMode", + "responseAlgorithms": [ + "CRC32", + "CRC32C", + "CRC64NVME", + "SHA1", + "SHA256", + ], + }, + }, + "HttpChecksumStreamingOperation": { + "name": "HttpChecksumStreamingOperation", + "http": { + "method": "POST", + "requestUri": "/HttpChecksumStreamingOperation", + }, + "input": {"shape": "SomeStreamingInput"}, + "output": {"shape": "SomeStreamingOutput"}, + "httpChecksum": { + "requestChecksumRequired": True, + "requestAlgorithmMember": "checksumAlgorithm", + "requestValidationModeMember": "validationMode", + "responseAlgorithms": [ + "CRC32", + "CRC32C", + "CRC64NVME", + "SHA1", + "SHA256", + ], + }, + }, + }, + "shapes": { + "ChecksumAlgorithm": { + "type": "string", + "enum": {"CRC32", "CRC32C", "CRC64NVME", "SHA1", "SHA256"}, + "member": {"shape": "MockOpParam"}, + }, + "ValidationMode": {"type": "string", "enum": {"ENABLE"}}, + "String": {"type": "string"}, + "Blob": {"type": "blob"}, + "SomeStreamingOutput": { + "type": "structure", + }, + "SomeStreamingInput": { + "type": "structure", + "required": ["body"], + "members": { + "body": { + "shape": "Blob", + "streaming": True, + }, + "validationMode": { + "shape": "ValidationMode", + "location": "header", + "locationName": "x-amz-response-validation-mode", + }, + "checksumAlgorithm": { + "shape": "ChecksumAlgorithm", + "location": "header", + "locationName": "x-amz-request-algorithm", + }, + }, + "payload": "body", + }, + "SomeInput": { + "type": "structure", + "required": ["body"], + "members": { + "body": {"shape": "String"}, + "validationMode": { + "shape": "ValidationMode", + "location": "header", + "locationName": "x-amz-response-validation-mode", + }, + "checksumAlgorithm": { + "shape": "ChecksumAlgorithm", + "location": "header", + "locationName": "x-amz-request-algorithm", + }, + }, + "payload": "body", + }, + "SomeOutput": { + "type": "structure", + }, + }, +} + +TEST_CHECKSUM_RULESET = { + "version": "1.0", + "parameters": {}, + "rules": [ + { + "conditions": [], + "type": "endpoint", + "endpoint": { + "url": "https://foo.bar", + "properties": {}, + "headers": {}, + }, + } + ], +} + + +def _request_checksum_calculation_cases(): + request_payload = "Hello world" + return ( + ( + "CRC32", + request_payload, + { + "x-amz-request-algorithm": "CRC32", + "x-amz-checksum-crc32": "i9aeUg==", + }, + ), + ( + "CRC32C", + request_payload, + { + "x-amz-request-algorithm": "CRC32C", + "x-amz-checksum-crc32c": "crUfeA==", + }, + ), + ( + "CRC64NVME", + request_payload, + { + "x-amz-request-algorithm": "CRC64NVME", + "x-amz-checksum-crc64nvme": "OOJZ0D8xKts=", + }, + ), + ( + "SHA1", + request_payload, + { + "x-amz-request-algorithm": "SHA1", + "x-amz-checksum-sha1": "e1AsOh9IyGCa4hLN+2Od7jlnP14=", + }, + ), + ( + "SHA256", + request_payload, + { + "x-amz-request-algorithm": "SHA256", + "x-amz-checksum-sha256": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=", + }, + ), + ) + + +@pytest.mark.parametrize( + "checksum_algorithm, request_payload, expected_headers", + _request_checksum_calculation_cases(), +) +def test_request_checksum_calculation( + patched_session, + monkeypatch, + checksum_algorithm, + request_payload, + expected_headers, +): + patch_load_service_model( + patched_session, + monkeypatch, + TEST_CHECKSUM_SERVICE_MODEL, + TEST_CHECKSUM_RULESET, + ) + client = patched_session.create_client( + "testservice", + region_name="us-west-2", + ) + with ClientHTTPStubber(client, strict=True) as http_stubber: + http_stubber.add_response(status=200, body=b"") + client.http_checksum_operation( + body=request_payload, checksumAlgorithm=checksum_algorithm + ) + actual_headers = http_stubber.requests[0].headers + for key, val in expected_headers.items(): + assert key in actual_headers + assert actual_headers[key].decode() == val + + +def _streaming_request_checksum_calculation_cases(): + request_payload = "Hello world" + return ( + ( + "CRC32", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc32", + }, + {"x-amz-checksum-crc32": "i9aeUg=="}, + ), + ( + "CRC32C", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc32c", + }, + {"x-amz-checksum-crc32c": "crUfeA=="}, + ), + ( + "CRC64NVME", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-crc64nvme", + }, + {"x-amz-checksum-crc64nvme": "OOJZ0D8xKts="}, + ), + ( + "SHA1", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-sha1", + }, + {"x-amz-checksum-sha1": "e1AsOh9IyGCa4hLN+2Od7jlnP14="}, + ), + ( + "SHA256", + request_payload, + { + "content-encoding": "aws-chunked", + "x-amz-trailer": "x-amz-checksum-sha256", + }, + { + "x-amz-checksum-sha256": "ZOyIygCyaOW6GjVnihtTFtIS9PNmskdyMlNKiuyjfzw=" + }, + ), + ) + + +@pytest.mark.parametrize( + "checksum_algorithm, request_payload, expected_headers, expected_trailers", + _streaming_request_checksum_calculation_cases(), +) +def test_streaming_request_checksum_calculation( + patched_session, + monkeypatch, + checksum_algorithm, + request_payload, + expected_headers, + expected_trailers, +): + patch_load_service_model( + patched_session, + monkeypatch, + TEST_CHECKSUM_SERVICE_MODEL, + TEST_CHECKSUM_RULESET, + ) + client = patched_session.create_client( + "testservice", + region_name="us-west-2", + ) + with ClientHTTPStubber(client, strict=True) as http_stubber: + http_stubber.add_response(status=200, body=b"") + client.http_checksum_streaming_operation( + body=request_payload, checksumAlgorithm=checksum_algorithm + ) + request = http_stubber.requests[0] + actual_headers = request.headers + for key, val in expected_headers.items(): + assert key in actual_headers + assert actual_headers[key].decode() == val + read_body = request.body.read() + for key, val in expected_trailers.items(): + assert f"{key}:{val}".encode() in read_body diff --git a/tests/functional/test_s3.py b/tests/functional/test_s3.py index 04fc32aa7b..753e19332a 100644 --- a/tests/functional/test_s3.py +++ b/tests/functional/test_s3.py @@ -10,7 +10,6 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import base64 import datetime import re @@ -19,7 +18,7 @@ import botocore.session from botocore import UNSIGNED -from botocore.compat import get_md5, parse_qs, urlsplit +from botocore.compat import parse_qs, urlsplit from botocore.config import Config from botocore.exceptions import ( ClientError, @@ -28,6 +27,7 @@ UnsupportedS3AccesspointConfigurationError, UnsupportedS3ConfigurationError, ) +from botocore.httpchecksum import _CHECKSUM_CLS from botocore.parsers import ResponseParserError from tests import ( BaseSessionTest, @@ -1454,30 +1454,61 @@ def setUp(self): def get_sent_headers(self): return self.http_stubber.requests[0].headers - def test_content_md5_set(self): + def test_trailing_checksum_set(self): with self.http_stubber: self.client.put_object(Bucket="foo", Key="bar", Body="baz") - self.assertIn("content-md5", self.get_sent_headers()) + sent_headers = self.get_sent_headers() + self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked") + self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked") + self.assertEqual( + sent_headers["X-Amz-Trailer"], b"x-amz-checksum-crc32" + ) + self.assertEqual(sent_headers["X-Amz-Decoded-Content-Length"], b"3") + self.assertEqual( + sent_headers["x-amz-content-sha256"], + b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", + ) - def test_content_md5_set_empty_body(self): + def test_trailing_checksum_set_empty_body(self): with self.http_stubber: self.client.put_object(Bucket="foo", Key="bar", Body="") - self.assertIn("content-md5", self.get_sent_headers()) + sent_headers = self.get_sent_headers() + self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked") + self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked") + self.assertEqual( + sent_headers["X-Amz-Trailer"], b"x-amz-checksum-crc32" + ) + self.assertEqual(sent_headers["X-Amz-Decoded-Content-Length"], b"0") + self.assertEqual( + sent_headers["x-amz-content-sha256"], + b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", + ) - def test_content_md5_set_empty_file(self): + def test_trailing_checksum_set_empty_file(self): with self.http_stubber: with temporary_file("rb") as f: assert f.read() == b"" self.client.put_object(Bucket="foo", Key="bar", Body=f) - self.assertIn("content-md5", self.get_sent_headers()) + sent_headers = self.get_sent_headers() + self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked") + self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked") + self.assertEqual( + sent_headers["X-Amz-Trailer"], b"x-amz-checksum-crc32" + ) + self.assertEqual(sent_headers["X-Amz-Decoded-Content-Length"], b"0") + self.assertEqual( + sent_headers["x-amz-content-sha256"], + b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", + ) - def test_content_sha256_set_if_config_value_is_true(self): - # By default, put_object() does not include an x-amz-content-sha256 - # header because it also includes a `Content-MD5` header. The - # `payload_signing_enabled` config overrides this logic and forces the - # header. + def test_content_sha256_not_set_if_config_value_is_true(self): + # By default, put_object() provides a trailing checksum and includes the + # x-amz-content-sha256 header with a value of "STREAMING-UNSIGNED-PAYLOAD-TRAILER". + # We do not support payload signing for streaming so the `payload_signing_enabled` + # config has no effect here. config = Config( - signature_version="s3v4", s3={"payload_signing_enabled": True} + signature_version="s3v4", + s3={"payload_signing_enabled": True}, ) self.client = self.session.create_client( "s3", self.region, config=config @@ -1488,11 +1519,17 @@ def test_content_sha256_set_if_config_value_is_true(self): self.client.put_object(Bucket="foo", Key="bar", Body="baz") sent_headers = self.get_sent_headers() sha_header = sent_headers.get("x-amz-content-sha256") - self.assertNotEqual(sha_header, b"UNSIGNED-PAYLOAD") + self.assertIsNotNone(sha_header) + self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") def test_content_sha256_not_set_if_config_value_is_false(self): + # By default, put_object() provides a trailing checksum and includes the + # x-amz-content-sha256 header with a value of "STREAMING-UNSIGNED-PAYLOAD-TRAILER". + # We do not support payload signing for streaming so the `payload_signing_enabled` + # config has no effect here. config = Config( - signature_version="s3v4", s3={"payload_signing_enabled": False} + signature_version="s3v4", + s3={"payload_signing_enabled": False}, ) self.client = self.session.create_client( "s3", self.region, config=config @@ -1503,15 +1540,19 @@ def test_content_sha256_not_set_if_config_value_is_false(self): self.client.put_object(Bucket="foo", Key="bar", Body="baz") sent_headers = self.get_sent_headers() sha_header = sent_headers.get("x-amz-content-sha256") - self.assertEqual(sha_header, b"UNSIGNED-PAYLOAD") + self.assertIsNotNone(sha_header) + self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") - def test_content_sha256_set_if_config_value_not_set_put_object(self): - # The default behavior matches payload_signing_enabled=False. For - # operations where the `Content-MD5` is present this means that - # `x-amz-content-sha256` is present but not set. + def test_content_sha256_not_set_if_config_value_not_set_put_object(self): + # By default, put_object() provides a trailing checksum and includes the + # x-amz-content-sha256 header with a value of "STREAMING-UNSIGNED-PAYLOAD-TRAILER". + # We do not support payload signing for streaming so the `payload_signing_enabled` + # config has no effect here. config = Config(signature_version="s3v4") self.client = self.session.create_client( - "s3", self.region, config=config + "s3", + self.region, + config=config, ) self.http_stubber = ClientHTTPStubber(self.client) self.http_stubber.add_response() @@ -1519,7 +1560,8 @@ def test_content_sha256_set_if_config_value_not_set_put_object(self): self.client.put_object(Bucket="foo", Key="bar", Body="baz") sent_headers = self.get_sent_headers() sha_header = sent_headers.get("x-amz-content-sha256") - self.assertEqual(sha_header, b"UNSIGNED-PAYLOAD") + self.assertIsNotNone(sha_header) + self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") def test_content_sha256_set_if_config_value_not_set_list_objects(self): # The default behavior matches payload_signing_enabled=False. For @@ -1555,16 +1597,6 @@ def test_content_sha256_set_s3_on_outpost(self): sha_header = sent_headers.get("x-amz-content-sha256") self.assertNotEqual(sha_header, b"UNSIGNED-PAYLOAD") - def test_content_sha256_set_if_md5_is_unavailable(self): - with mock.patch("botocore.compat.MD5_AVAILABLE", False): - with mock.patch("botocore.utils.MD5_AVAILABLE", False): - with self.http_stubber: - self.client.put_object(Bucket="foo", Key="bar", Body="baz") - sent_headers = self.get_sent_headers() - unsigned = "UNSIGNED-PAYLOAD" - self.assertNotEqual(sent_headers["x-amz-content-sha256"], unsigned) - self.assertNotIn("content-md5", sent_headers) - class TestCanSendIntegerHeaders(BaseSessionTest): def test_int_values_with_sigv4(self): @@ -2200,7 +2232,7 @@ def test_checksums_included_in_expected_operations( stub.add_response() call = getattr(client, operation) call(**operation_kwargs) - assert "Content-MD5" in stub.requests[-1].headers + assert "x-amz-checksum-crc32" in stub.requests[-1].headers @pytest.mark.parametrize( @@ -3637,10 +3669,12 @@ def _verify_presigned_url_addressing( class TestS3XMLPayloadEscape(BaseS3OperationTest): - def assert_correct_content_md5(self, request): - content_md5_bytes = get_md5(request.body).digest() - content_md5 = base64.b64encode(content_md5_bytes) - self.assertEqual(content_md5, request.headers["Content-MD5"]) + def assert_correct_crc32_checksum(self, request): + checksum = _CHECKSUM_CLS.get("crc32")() + crc32_checksum = checksum.handle(request.body).encode() + self.assertEqual( + crc32_checksum, request.headers["x-amz-checksum-crc32"] + ) def test_escape_keys_in_xml_delete_objects(self): self.http_stubber.add_response() @@ -3652,7 +3686,7 @@ def test_escape_keys_in_xml_delete_objects(self): request = self.http_stubber.requests[0] self.assertNotIn(b"\r\n\r", request.body) self.assertIn(b" ", request.body) - self.assert_correct_content_md5(request) + self.assert_correct_crc32_checksum(request) def test_escape_keys_in_xml_put_bucket_lifecycle_configuration(self): self.http_stubber.add_response() @@ -3671,7 +3705,7 @@ def test_escape_keys_in_xml_put_bucket_lifecycle_configuration(self): request = self.http_stubber.requests[0] self.assertNotIn(b"my\r\n\rprefix", request.body) self.assertIn(b"my prefix", request.body) - self.assert_correct_content_md5(request) + self.assert_correct_crc32_checksum(request) class TestExpectContinueBehavior(BaseSessionTest): diff --git a/tests/unit/test_handlers.py b/tests/unit/test_handlers.py index 7e28356442..2164cb919d 100644 --- a/tests/unit/test_handlers.py +++ b/tests/unit/test_handlers.py @@ -43,7 +43,6 @@ ServiceModel, ) from botocore.signers import RequestSigner -from botocore.utils import conditionally_calculate_md5 from tests import BaseSessionTest, mock, unittest @@ -1313,164 +1312,6 @@ def test_copy_source_sse_params_as_str(self): self.assertEqual(params['CopySourceSSECustomerKeyMD5'], 'Zm9v') -class TestAddMD5(BaseMD5Test): - def get_context(self, s3_config=None): - if s3_config is None: - s3_config = {} - return {'client_config': Config(s3=s3_config)} - - def test_adds_md5_when_v4(self): - credentials = Credentials('key', 'secret') - request_signer = RequestSigner( - ServiceId('s3'), 'us-east-1', 's3', 'v4', credentials, mock.Mock() - ) - request_dict = { - 'body': b'bar', - 'url': 'https://s3.us-east-1.amazonaws.com', - 'method': 'PUT', - 'headers': {}, - } - context = self.get_context() - conditionally_calculate_md5( - request_dict, request_signer=request_signer, context=context - ) - self.assertTrue('Content-MD5' in request_dict['headers']) - - def test_adds_md5_when_s3v4(self): - credentials = Credentials('key', 'secret') - request_signer = RequestSigner( - ServiceId('s3'), - 'us-east-1', - 's3', - 's3v4', - credentials, - mock.Mock(), - ) - request_dict = { - 'body': b'bar', - 'url': 'https://s3.us-east-1.amazonaws.com', - 'method': 'PUT', - 'headers': {}, - } - context = self.get_context({'payload_signing_enabled': False}) - conditionally_calculate_md5( - request_dict, request_signer=request_signer, context=context - ) - self.assertTrue('Content-MD5' in request_dict['headers']) - - def test_conditional_does_not_add_when_md5_unavailable(self): - credentials = Credentials('key', 'secret') - request_signer = RequestSigner( - 's3', 'us-east-1', 's3', 's3', credentials, mock.Mock() - ) - request_dict = { - 'body': b'bar', - 'url': 'https://s3.us-east-1.amazonaws.com', - 'method': 'PUT', - 'headers': {}, - } - - context = self.get_context() - self.set_md5_available(False) - with mock.patch('botocore.utils.MD5_AVAILABLE', False): - conditionally_calculate_md5( - request_dict, request_signer=request_signer, context=context - ) - self.assertFalse('Content-MD5' in request_dict['headers']) - - def test_add_md5_raises_error_when_md5_unavailable(self): - credentials = Credentials('key', 'secret') - request_signer = RequestSigner( - ServiceId('s3'), 'us-east-1', 's3', 's3', credentials, mock.Mock() - ) - request_dict = { - 'body': b'bar', - 'url': 'https://s3.us-east-1.amazonaws.com', - 'method': 'PUT', - 'headers': {}, - } - - self.set_md5_available(False) - with self.assertRaises(MD5UnavailableError): - conditionally_calculate_md5( - request_dict, request_signer=request_signer - ) - - def test_adds_md5_when_s3v2(self): - credentials = Credentials('key', 'secret') - request_signer = RequestSigner( - ServiceId('s3'), 'us-east-1', 's3', 's3', credentials, mock.Mock() - ) - request_dict = { - 'body': b'bar', - 'url': 'https://s3.us-east-1.amazonaws.com', - 'method': 'PUT', - 'headers': {}, - } - context = self.get_context() - conditionally_calculate_md5( - request_dict, request_signer=request_signer, context=context - ) - self.assertTrue('Content-MD5' in request_dict['headers']) - - def test_add_md5_with_file_like_body(self): - request_dict = {'body': io.BytesIO(b'foobar'), 'headers': {}} - self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' - conditionally_calculate_md5(request_dict) - self.assertEqual( - request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' - ) - - def test_add_md5_with_bytes_object(self): - request_dict = {'body': b'foobar', 'headers': {}} - self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' - conditionally_calculate_md5(request_dict) - self.assertEqual( - request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' - ) - - def test_add_md5_with_empty_body(self): - request_dict = {'body': b'', 'headers': {}} - self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' - conditionally_calculate_md5(request_dict) - self.assertEqual( - request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' - ) - - def test_add_md5_with_bytearray_object(self): - request_dict = {'body': bytearray(b'foobar'), 'headers': {}} - self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' - conditionally_calculate_md5(request_dict) - self.assertEqual( - request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' - ) - - def test_skip_md5_when_flexible_checksum_context(self): - request_dict = { - 'body': io.BytesIO(b'foobar'), - 'headers': {}, - 'context': { - 'checksum': { - 'request_algorithm': { - 'in': 'header', - 'algorithm': 'crc32', - 'name': 'x-amz-checksum-crc32', - } - } - }, - } - conditionally_calculate_md5(request_dict) - self.assertNotIn('Content-MD5', request_dict['headers']) - - def test_skip_md5_when_flexible_checksum_explicit_header(self): - request_dict = { - 'body': io.BytesIO(b'foobar'), - 'headers': {'x-amz-checksum-crc32': 'foo'}, - } - conditionally_calculate_md5(request_dict) - self.assertNotIn('Content-MD5', request_dict['headers']) - - class TestParameterAlias(unittest.TestCase): def setUp(self): self.original_name = 'original' diff --git a/tests/unit/test_httpchecksum.py b/tests/unit/test_httpchecksum.py index 1e46fc3268..84f1d07998 100644 --- a/tests/unit/test_httpchecksum.py +++ b/tests/unit/test_httpchecksum.py @@ -15,6 +15,7 @@ from botocore.awsrequest import AWSResponse from botocore.compat import HAS_CRT +from botocore.config import Config from botocore.exceptions import ( AwsChunkedWrapperError, FlexibleChecksumError, @@ -89,7 +90,11 @@ def _build_request(self, body): request = { "headers": {}, "body": body, - "context": {}, + "context": { + "client_config": Config( + request_checksum_calculation="when_supported", + ) + }, "url": "https://example.com", } return request @@ -106,20 +111,26 @@ def test_request_checksum_algorithm_model_opt_in(self): http_checksum={"requestAlgorithmMember": "Algorithm"} ) - # Param is not present, no checksum will be set + # Param is not present, crc32 checksum will be set by default params = {} request = self._build_request(b"") resolve_request_checksum_algorithm(request, operation_model, params) - self.assertNotIn("checksum", request["context"]) + expected_algorithm = { + "algorithm": "crc32", + "in": "header", + "name": "x-amz-checksum-crc32", + } + actual_algorithm = request["context"]["checksum"]["request_algorithm"] + self.assertEqual(actual_algorithm, expected_algorithm) - # Param is present, crc32 checksum will be set - params = {"Algorithm": "crc32"} + # Param is present, sha256 checksum will be set + params = {"Algorithm": "sha256"} request = self._build_request(b"") resolve_request_checksum_algorithm(request, operation_model, params) expected_algorithm = { - "algorithm": "crc32", + "algorithm": "sha256", "in": "header", - "name": "x-amz-checksum-crc32", + "name": "x-amz-checksum-sha256", } actual_algorithm = request["context"]["checksum"]["request_algorithm"] self.assertEqual(actual_algorithm, expected_algorithm) @@ -138,14 +149,9 @@ def test_request_checksum_algorithm_model_opt_in_streaming(self): streaming_input=True, ) - # Param is not present, no checksum will be set + # Param is not present, crc32 checksum will be set params = {} resolve_request_checksum_algorithm(request, operation_model, params) - self.assertNotIn("checksum", request["context"]) - - # Param is present, crc32 checksum will be set in the trailer - params = {"Algorithm": "crc32"} - resolve_request_checksum_algorithm(request, operation_model, params) expected_algorithm = { "algorithm": "crc32", "in": "trailer", @@ -154,14 +160,25 @@ def test_request_checksum_algorithm_model_opt_in_streaming(self): actual_algorithm = request["context"]["checksum"]["request_algorithm"] self.assertEqual(actual_algorithm, expected_algorithm) + # Param is present, sha256 checksum will be set in the trailer + params = {"Algorithm": "sha256"} + resolve_request_checksum_algorithm(request, operation_model, params) + expected_algorithm = { + "algorithm": "sha256", + "in": "trailer", + "name": "x-amz-checksum-sha256", + } + actual_algorithm = request["context"]["checksum"]["request_algorithm"] + self.assertEqual(actual_algorithm, expected_algorithm) + # Trailer should not be used for http endpoints request = self._build_request(b"") request["url"] = "http://example.com" resolve_request_checksum_algorithm(request, operation_model, params) expected_algorithm = { - "algorithm": "crc32", + "algorithm": "sha256", "in": "header", - "name": "x-amz-checksum-crc32", + "name": "x-amz-checksum-sha256", } actual_algorithm = request["context"]["checksum"]["request_algorithm"] self.assertEqual(actual_algorithm, expected_algorithm) @@ -212,17 +229,21 @@ def test_request_checksum_algorithm_model_no_crt_crc64nvme_unsupported( str(context.exception), ) - def test_request_checksum_algorithm_model_legacy_md5(self): + def test_request_checksum_algorithm_model_legacy_crc32(self): request = self._build_request(b"") operation_model = self._make_operation_model(required=True) params = {} resolve_request_checksum_algorithm(request, operation_model, params) + expected_algorithm = { + "algorithm": "crc32", + "in": "header", + "name": "x-amz-checksum-crc32", + } actual_algorithm = request["context"]["checksum"]["request_algorithm"] - expected_algorithm = "conditional-md5" self.assertEqual(actual_algorithm, expected_algorithm) - def test_request_checksum_algorithm_model_new_md5(self): + def test_request_checksum_algorithm_model_new_crc32(self): request = self._build_request(b"") operation_model = self._make_operation_model( http_checksum={"requestChecksumRequired": True} @@ -231,7 +252,11 @@ def test_request_checksum_algorithm_model_new_md5(self): resolve_request_checksum_algorithm(request, operation_model, params) actual_algorithm = request["context"]["checksum"]["request_algorithm"] - expected_algorithm = "conditional-md5" + expected_algorithm = { + "algorithm": "crc32", + "in": "header", + "name": "x-amz-checksum-crc32", + } self.assertEqual(actual_algorithm, expected_algorithm) def test_apply_request_checksum_handles_no_checksum_context(self): @@ -239,7 +264,9 @@ def test_apply_request_checksum_handles_no_checksum_context(self): apply_request_checksum(request) # Build another request and assert the original request is the same expected_request = self._build_request(b"") - self.assertEqual(request, expected_request) + self.assertEqual(request["headers"], expected_request["headers"]) + self.assertEqual(request["body"], expected_request["body"]) + self.assertEqual(request["url"], expected_request["url"]) def test_apply_request_checksum_handles_invalid_context(self): request = self._build_request(b"") @@ -253,14 +280,6 @@ def test_apply_request_checksum_handles_invalid_context(self): with self.assertRaises(FlexibleChecksumError): apply_request_checksum(request) - def test_apply_request_checksum_conditional_md5(self): - request = self._build_request(b"") - request["context"]["checksum"] = { - "request_algorithm": "conditional-md5" - } - apply_request_checksum(request) - self.assertIn("Content-MD5", request["headers"]) - def test_apply_request_checksum_flex_header_bytes(self): request = self._build_request(b"") request["context"]["checksum"] = {