diff --git a/botocore/handlers.py b/botocore/handlers.py index 0ecdb5cc51..3c86150781 100644 --- a/botocore/handlers.py +++ b/botocore/handlers.py @@ -52,7 +52,6 @@ ParamValidationError, UnsupportedTLSVersionWarning, ) -from botocore.httpchecksum import DEFAULT_CHECKSUM_ALGORITHM from botocore.regions import EndpointResolverBuiltins from botocore.signers import ( add_generate_db_auth_token, @@ -1270,15 +1269,6 @@ def _update_status_code(response, **kwargs): http_response.status_code = parsed_status_code -def set_default_multipart_checksum_algorithm(params, **kwargs): - """Sets the ``ChecksumAlgorithm`` parameter to the SDKs default checksum. - - The ``CreateMultipartUpload`` operation isn't modeled with the ``httpchecksum`` - trait and requires us to set a default checksum algorithm when none is provided. - """ - params.setdefault('ChecksumAlgorithm', DEFAULT_CHECKSUM_ALGORITHM) - - # This is a list of (event_name, handler). # When a Session is created, everything in this list will be # automatically registered with that Session. @@ -1333,10 +1323,6 @@ def set_default_multipart_checksum_algorithm(params, **kwargs): 'before-parameter-build.s3.CreateMultipartUpload', validate_ascii_metadata, ), - ( - 'before-parameter-build.s3.CreateMultipartUpload', - set_default_multipart_checksum_algorithm, - ), ('before-parameter-build.s3-control', remove_accid_host_prefix_from_model), ('docs.*.s3.CopyObject.complete-section', document_copy_source_form), ('docs.*.s3.UploadPartCopy.complete-section', document_copy_source_form), diff --git a/botocore/httpchecksum.py b/botocore/httpchecksum.py index 2678e414d9..a09fe03873 100644 --- a/botocore/httpchecksum.py +++ b/botocore/httpchecksum.py @@ -25,7 +25,7 @@ from binascii import crc32 from hashlib import sha1, sha256 -from botocore.compat import HAS_CRT +from botocore.compat import HAS_CRT, urlparse from botocore.exceptions import ( AwsChunkedWrapperError, FlexibleChecksumError, @@ -293,14 +293,14 @@ def resolve_request_checksum_algorithm( elif request_checksum_required or ( algorithm_member and request_checksum_calculation == "when_supported" ): - algorithm_name = "crc32" + algorithm_name = DEFAULT_CHECKSUM_ALGORITHM.lower() else: return location_type = "header" if operation_model.has_streaming_input: # Operations with streaming input must support trailers. - if request["url"].startswith("https:"): + if urlparse(request["url"]).scheme == "https": # We only support unsigned trailer checksums currently. As this # disables payload signing we'll only use trailers over TLS. location_type = "trailer" diff --git a/botocore/utils.py b/botocore/utils.py index a14d4f1ea7..a531972d63 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +import base64 import binascii import datetime import email.message @@ -49,8 +50,10 @@ HAS_CRT, IPV4_RE, IPV6_ADDRZ_RE, + MD5_AVAILABLE, UNSAFE_URL_CHARS, OrderedDict, + get_md5, get_tzinfo_options, json, quote, @@ -3220,6 +3223,38 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'): return default +def calculate_md5(body, **kwargs): + """This function has been deprecated, but is kept for backwards compatibility.""" + if isinstance(body, (bytes, bytearray)): + binary_md5 = _calculate_md5_from_bytes(body) + else: + binary_md5 = _calculate_md5_from_file(body) + return base64.b64encode(binary_md5).decode('ascii') + + +def _calculate_md5_from_bytes(body_bytes): + """This function has been deprecated, but is kept for backwards compatibility.""" + md5 = get_md5(body_bytes) + return md5.digest() + + +def _calculate_md5_from_file(fileobj): + """This function has been deprecated, but is kept for backwards compatibility.""" + start_position = fileobj.tell() + md5 = get_md5() + for chunk in iter(lambda: fileobj.read(1024 * 1024), b''): + md5.update(chunk) + fileobj.seek(start_position) + return md5.digest() + + +def _is_s3express_request(params): + endpoint_properties = params.get('context', {}).get( + 'endpoint_properties', {} + ) + return endpoint_properties.get('backend') == 'S3Express' + + def _has_checksum_header(params): headers = params['headers'] @@ -3232,6 +3267,57 @@ def _has_checksum_header(params): return False +def conditionally_calculate_checksum(params, **kwargs): + """This function has been deprecated, but is kept for backwards compatibility.""" + if not _has_checksum_header(params): + conditionally_calculate_md5(params, **kwargs) + conditionally_enable_crc32(params, **kwargs) + + +def conditionally_enable_crc32(params, **kwargs): + """This function has been deprecated, but is kept for backwards compatibility.""" + checksum_context = params.get('context', {}).get('checksum', {}) + checksum_algorithm = checksum_context.get('request_algorithm') + if ( + _is_s3express_request(params) + and params['body'] is not None + and checksum_algorithm in (None, "conditional-md5") + ): + params['context']['checksum'] = { + 'request_algorithm': { + 'algorithm': 'crc32', + 'in': 'header', + 'name': 'x-amz-checksum-crc32', + } + } + + +def conditionally_calculate_md5(params, **kwargs): + """ + This function has been deprecated, but is kept for backwards compatibility. + + Only add a Content-MD5 if the system supports it. + """ + body = params['body'] + checksum_context = params.get('context', {}).get('checksum', {}) + checksum_algorithm = checksum_context.get('request_algorithm') + if checksum_algorithm and checksum_algorithm != 'conditional-md5': + # Skip for requests that will have a flexible checksum applied + return + + if _has_checksum_header(params): + # Don't add a new header if one is already available. + return + + if _is_s3express_request(params): + # S3Express doesn't support MD5 + return + + if MD5_AVAILABLE and body is not None: + md5_digest = calculate_md5(body, **kwargs) + params['headers']['Content-MD5'] = md5_digest + + class FileWebIdentityTokenLoader: def __init__(self, web_identity_token_path, _open=open): self._web_identity_token_path = web_identity_token_path diff --git a/tests/functional/test_s3.py b/tests/functional/test_s3.py index 6fe298277b..e98e96594f 100644 --- a/tests/functional/test_s3.py +++ b/tests/functional/test_s3.py @@ -27,7 +27,7 @@ UnsupportedS3AccesspointConfigurationError, UnsupportedS3ConfigurationError, ) -from botocore.httpchecksum import _CHECKSUM_CLS +from botocore.httpchecksum import HAS_CRT, Crc32Checksum, CrtCrc32Checksum from tests import ( BaseSessionTest, ClientHTTPStubber, @@ -1533,6 +1533,8 @@ def test_trailing_checksum_set(self): sent_headers["x-amz-content-sha256"], b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", ) + body = self.http_stubber.requests[0].body.read() + self.assertIn(b"x-amz-checksum-crc32", body) def test_trailing_checksum_set_empty_body(self): with self.http_stubber: @@ -1548,12 +1550,15 @@ def test_trailing_checksum_set_empty_body(self): sent_headers["x-amz-content-sha256"], b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", ) + body = self.http_stubber.requests[0].body.read() + self.assertIn(b"x-amz-checksum-crc32", body) def test_trailing_checksum_set_empty_file(self): with self.http_stubber: with temporary_file("rb") as f: assert f.read() == b"" self.client.put_object(Bucket="foo", Key="bar", Body=f) + body = self.http_stubber.requests[0].body.read() sent_headers = self.get_sent_headers() self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked") self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked") @@ -1565,6 +1570,7 @@ def test_trailing_checksum_set_empty_file(self): sent_headers["x-amz-content-sha256"], b"STREAMING-UNSIGNED-PAYLOAD-TRAILER", ) + self.assertIn(b"x-amz-checksum-crc32", body) def test_content_sha256_not_set_if_config_value_is_true(self): # By default, put_object() provides a trailing checksum and includes the @@ -1584,7 +1590,6 @@ def test_content_sha256_not_set_if_config_value_is_true(self): self.client.put_object(Bucket="foo", Key="bar", Body="baz") sent_headers = self.get_sent_headers() sha_header = sent_headers.get("x-amz-content-sha256") - self.assertIsNotNone(sha_header) self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") def test_content_sha256_not_set_if_config_value_is_false(self): @@ -1605,7 +1610,6 @@ def test_content_sha256_not_set_if_config_value_is_false(self): self.client.put_object(Bucket="foo", Key="bar", Body="baz") sent_headers = self.get_sent_headers() sha_header = sent_headers.get("x-amz-content-sha256") - self.assertIsNotNone(sha_header) self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") def test_content_sha256_not_set_if_config_value_not_set_put_object(self): @@ -1625,7 +1629,6 @@ def test_content_sha256_not_set_if_config_value_not_set_put_object(self): self.client.put_object(Bucket="foo", Key="bar", Body="baz") sent_headers = self.get_sent_headers() sha_header = sent_headers.get("x-amz-content-sha256") - self.assertIsNotNone(sha_header) self.assertEqual(sha_header, b"STREAMING-UNSIGNED-PAYLOAD-TRAILER") def test_content_sha256_set_if_config_value_not_set_list_objects(self): @@ -3735,7 +3738,7 @@ def _verify_presigned_url_addressing( class TestS3XMLPayloadEscape(BaseS3OperationTest): def assert_correct_crc32_checksum(self, request): - checksum = _CHECKSUM_CLS.get("crc32")() + checksum = CrtCrc32Checksum() if HAS_CRT else Crc32Checksum() crc32_checksum = checksum.handle(request.body).encode() self.assertEqual( crc32_checksum, request.headers["x-amz-checksum-crc32"] diff --git a/tests/unit/test_handlers.py b/tests/unit/test_handlers.py index 54725d3e2e..924abed8af 100644 --- a/tests/unit/test_handlers.py +++ b/tests/unit/test_handlers.py @@ -43,6 +43,7 @@ ServiceModel, ) from botocore.signers import RequestSigner +from botocore.utils import conditionally_calculate_md5 from tests import BaseSessionTest, mock, unittest @@ -1126,16 +1127,6 @@ def test_no_payload_signing_disabled_does_not_set_key(self): ) self.assertNotIn('payload_signing_enabled', context) - def test_set_default_multipart_checksum_algorithm(self): - params = {} - handlers.set_default_multipart_checksum_algorithm(params) - self.assertEqual(params["ChecksumAlgorithm"], "CRC32") - - def test_does_not_set_default_multipart_checksum_algorithm(self): - params = {"ChecksumAlgorithm": "SHA256"} - handlers.set_default_multipart_checksum_algorithm(params) - self.assertEqual(params["ChecksumAlgorithm"], "SHA256") - @pytest.mark.parametrize( 'auth_type, expected_response', [('v4', 's3v4'), ('v4a', 's3v4a')] @@ -1322,6 +1313,164 @@ def test_copy_source_sse_params_as_str(self): self.assertEqual(params['CopySourceSSECustomerKeyMD5'], 'Zm9v') +class TestAddMD5(BaseMD5Test): + def get_context(self, s3_config=None): + if s3_config is None: + s3_config = {} + return {'client_config': Config(s3=s3_config)} + + def test_adds_md5_when_v4(self): + credentials = Credentials('key', 'secret') + request_signer = RequestSigner( + ServiceId('s3'), 'us-east-1', 's3', 'v4', credentials, mock.Mock() + ) + request_dict = { + 'body': b'bar', + 'url': 'https://s3.us-east-1.amazonaws.com', + 'method': 'PUT', + 'headers': {}, + } + context = self.get_context() + conditionally_calculate_md5( + request_dict, request_signer=request_signer, context=context + ) + self.assertTrue('Content-MD5' in request_dict['headers']) + + def test_adds_md5_when_s3v4(self): + credentials = Credentials('key', 'secret') + request_signer = RequestSigner( + ServiceId('s3'), + 'us-east-1', + 's3', + 's3v4', + credentials, + mock.Mock(), + ) + request_dict = { + 'body': b'bar', + 'url': 'https://s3.us-east-1.amazonaws.com', + 'method': 'PUT', + 'headers': {}, + } + context = self.get_context({'payload_signing_enabled': False}) + conditionally_calculate_md5( + request_dict, request_signer=request_signer, context=context + ) + self.assertTrue('Content-MD5' in request_dict['headers']) + + def test_conditional_does_not_add_when_md5_unavailable(self): + credentials = Credentials('key', 'secret') + request_signer = RequestSigner( + 's3', 'us-east-1', 's3', 's3', credentials, mock.Mock() + ) + request_dict = { + 'body': b'bar', + 'url': 'https://s3.us-east-1.amazonaws.com', + 'method': 'PUT', + 'headers': {}, + } + + context = self.get_context() + self.set_md5_available(False) + with mock.patch('botocore.utils.MD5_AVAILABLE', False): + conditionally_calculate_md5( + request_dict, request_signer=request_signer, context=context + ) + self.assertFalse('Content-MD5' in request_dict['headers']) + + def test_add_md5_raises_error_when_md5_unavailable(self): + credentials = Credentials('key', 'secret') + request_signer = RequestSigner( + ServiceId('s3'), 'us-east-1', 's3', 's3', credentials, mock.Mock() + ) + request_dict = { + 'body': b'bar', + 'url': 'https://s3.us-east-1.amazonaws.com', + 'method': 'PUT', + 'headers': {}, + } + + self.set_md5_available(False) + with self.assertRaises(MD5UnavailableError): + conditionally_calculate_md5( + request_dict, request_signer=request_signer + ) + + def test_adds_md5_when_s3v2(self): + credentials = Credentials('key', 'secret') + request_signer = RequestSigner( + ServiceId('s3'), 'us-east-1', 's3', 's3', credentials, mock.Mock() + ) + request_dict = { + 'body': b'bar', + 'url': 'https://s3.us-east-1.amazonaws.com', + 'method': 'PUT', + 'headers': {}, + } + context = self.get_context() + conditionally_calculate_md5( + request_dict, request_signer=request_signer, context=context + ) + self.assertTrue('Content-MD5' in request_dict['headers']) + + def test_add_md5_with_file_like_body(self): + request_dict = {'body': io.BytesIO(b'foobar'), 'headers': {}} + self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' + conditionally_calculate_md5(request_dict) + self.assertEqual( + request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' + ) + + def test_add_md5_with_bytes_object(self): + request_dict = {'body': b'foobar', 'headers': {}} + self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' + conditionally_calculate_md5(request_dict) + self.assertEqual( + request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' + ) + + def test_add_md5_with_empty_body(self): + request_dict = {'body': b'', 'headers': {}} + self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' + conditionally_calculate_md5(request_dict) + self.assertEqual( + request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' + ) + + def test_add_md5_with_bytearray_object(self): + request_dict = {'body': bytearray(b'foobar'), 'headers': {}} + self.md5_digest.return_value = b'8X\xf6"0\xac<\x91_0\x0cfC\x12\xc6?' + conditionally_calculate_md5(request_dict) + self.assertEqual( + request_dict['headers']['Content-MD5'], 'OFj2IjCsPJFfMAxmQxLGPw==' + ) + + def test_skip_md5_when_flexible_checksum_context(self): + request_dict = { + 'body': io.BytesIO(b'foobar'), + 'headers': {}, + 'context': { + 'checksum': { + 'request_algorithm': { + 'in': 'header', + 'algorithm': 'crc32', + 'name': 'x-amz-checksum-crc32', + } + } + }, + } + conditionally_calculate_md5(request_dict) + self.assertNotIn('Content-MD5', request_dict['headers']) + + def test_skip_md5_when_flexible_checksum_explicit_header(self): + request_dict = { + 'body': io.BytesIO(b'foobar'), + 'headers': {'x-amz-checksum-crc32': 'foo'}, + } + conditionally_calculate_md5(request_dict) + self.assertNotIn('Content-MD5', request_dict['headers']) + + class TestParameterAlias(unittest.TestCase): def setUp(self): self.original_name = 'original'