From d5c16a88398695f319f56fb23df417b9bdecbf95 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Tue, 3 Sep 2024 12:59:25 +0000 Subject: [PATCH] Add proto definitions for GPU fallbacks --- modal/functions.py | 13 ++++++++----- modal_proto/api.proto | 14 ++++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/modal/functions.py b/modal/functions.py index 9a00e635be..3d695f2e04 100644 --- a/modal/functions.py +++ b/modal/functions.py @@ -820,14 +820,17 @@ async def _load(self: _Function, resolver: Resolver, existing_object_id: Optiona scheduler_placement=scheduler_placement.proto if scheduler_placement else None, is_class=info.is_service_class(), class_parameter_info=info.class_parameter_info(), - _experimental_resources=[ - convert_fn_config_to_resources_config( - cpu=cpu, memory=memory, gpu=_experimental_gpu, ephemeral_disk=ephemeral_disk + i6pn_enabled=config.get("i6pn_enabled"), + _experimental_concurrent_cancellations=True, + _experimental_task_templates=[ + api_pb2.TaskTemplate( + priority=1, + resource=convert_fn_config_to_resources_config( + cpu=cpu, memory=memory, gpu=_experimental_gpu, ephemeral_disk=ephemeral_disk + ), ) for _experimental_gpu in _experimental_gpus ], - i6pn_enabled=config.get("i6pn_enabled"), - _experimental_concurrent_cancellations=True, ) assert resolver.app_id request = api_pb2.FunctionCreateRequest( diff --git a/modal_proto/api.proto b/modal_proto/api.proto index fef96710ec..e6e85d3161 100644 --- a/modal_proto/api.proto +++ b/modal_proto/api.proto @@ -943,7 +943,6 @@ message FileEntry { uint64 size = 4; } - message Function { string module_name = 1; string function_name = 2; @@ -1045,8 +1044,7 @@ message Function { ClassParameterInfo class_parameter_info = 56; - repeated Resources _experimental_resources = 57; // overrides `resources` field above - + reserved 57; // _experimental_resources reserved 58; reserved 59; uint32 batch_max_size = 60; // Maximum number of inputs to fetch at once @@ -1054,6 +1052,9 @@ message Function { bool i6pn_enabled = 62; bool _experimental_concurrent_cancellations = 63; uint32 max_concurrent_inputs = 64; + + bool _experimental_task_templates_enabled = 65; // forces going through the new gpu-fallbacks integration path, even if no fallback options are specified + repeated TaskTemplate _experimental_task_templates = 66; // for fallback options, where the first/most-preferred "template" is derived from fields above } message FunctionBindParamsRequest { @@ -1762,7 +1763,7 @@ message Sandbox { // to look at fine-grained placement constraints. reserved 16; // _experimental_scheduler optional SchedulerPlacement scheduler_placement = 17; - repeated Resources _experimental_resources = 18; // overrides `resources` field above + reserved 18; // _experimental_resources string worker_id = 19; // for internal debugging use only oneof open_ports_oneof { @@ -2072,6 +2073,11 @@ message TaskStats { double started_at = 4; } +message TaskTemplate { + uint32 priority = 1; + Resources resources = 2; + uint32 concurrent_inputs = 3; +} message TokenFlowCreateRequest { string utm_source = 3;