From ce3b6b57e1712714c81a1f751ff044a7562d300b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kasper=20Ramstr=C3=B6m?= Date: Mon, 1 Jul 2024 17:25:17 +0200 Subject: [PATCH] Cap large exception messages (#1952) --- modal/_container_io_manager.py | 12 +++++++++++- test/container_test.py | 19 +++++++++++++++++++ test/supports/functions.py | 6 ++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/modal/_container_io_manager.py b/modal/_container_io_manager.py index a6c5c9bbb..42041c786 100644 --- a/modal/_container_io_manager.py +++ b/modal/_container_io_manager.py @@ -514,13 +514,23 @@ async def handle_input_exception(self, input_id, started_at: float) -> AsyncGene # serializing the exception, which may have some issues (there # was an earlier note about it that it might not be possible # to unpickle it in some cases). Let's watch out for issues. + + repr_exc = repr(exc) + if len(repr_exc) >= MAX_OBJECT_SIZE_BYTES: + # We prevent large exception messages to avoid + # unhandled exceptions causing inf loops + # and just send backa trimmed version + trimmed_bytes = len(repr_exc) - MAX_OBJECT_SIZE_BYTES - 1000 + repr_exc = repr_exc[: MAX_OBJECT_SIZE_BYTES - 1000] + repr_exc = f"{repr_exc}...\nTrimmed {trimmed_bytes} bytes from original exception" + await self._push_output( input_id, started_at=started_at, data_format=api_pb2.DATA_FORMAT_PICKLE, status=api_pb2.GenericResult.GENERIC_STATUS_FAILURE, data=self.serialize_exception(exc), - exception=repr(exc), + exception=repr_exc, traceback=traceback.format_exc(), serialized_tb=serialized_tb, tb_line_cache=tb_line_cache, diff --git a/test/container_test.py b/test/container_test.py index c27b11cd3..e4e86f42b 100644 --- a/test/container_test.py +++ b/test/container_test.py @@ -31,6 +31,7 @@ serialize_data_format, ) from modal._utils import async_utils +from modal._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from modal.app import _App from modal.exception import InvalidError from modal.partial_function import enter, method @@ -1423,6 +1424,24 @@ async def custom_heartbeater(self): assert "Traceback" not in caplog.text # should not print a full traceback - don't scare users! +@skip_github_non_linux +@pytest.mark.usefixtures("server_url_env") +def test_container_doesnt_send_large_exceptions(servicer): + # Tests that large exception messages (>2mb are trimmed) + ret = _run_container( + servicer, + "test.supports.functions", + "raise_large_unicode_exception", + inputs=_get_inputs(((), {})), + ) + + assert len(ret.items) == 1 + assert len(ret.items[0].SerializeToString()) < MAX_OBJECT_SIZE_BYTES * 1.5 + assert ret.items[0].result.status == api_pb2.GenericResult.GENERIC_STATUS_FAILURE + assert "UnicodeDecodeError" in ret.items[0].result.exception + assert servicer.task_result is None # should not cause a failure result + + @skip_github_non_linux @pytest.mark.usefixtures("server_url_env") def test_sigint_termination_input_concurrent(servicer): diff --git a/test/supports/functions.py b/test/supports/functions.py index 0ff3d6db8..8afd4f5ae 100644 --- a/test/supports/functions.py +++ b/test/supports/functions.py @@ -433,3 +433,9 @@ def sandbox_f(x): @app.function() def is_local_f(x): return is_local() + + +@app.function() +def raise_large_unicode_exception(): + byte_str = (b"k" * 120_000_000) + b"\x99" + byte_str.decode("utf-8")