diff --git a/flytekit/core/python_auto_container.py b/flytekit/core/python_auto_container.py index 60a6522ce8..1d795120c3 100644 --- a/flytekit/core/python_auto_container.py +++ b/flytekit/core/python_auto_container.py @@ -16,7 +16,7 @@ from flytekit.core.tracker import TrackedInstance, extract_task_module from flytekit.core.utils import _get_container_definition, _serialize_pod_spec, timeit from flytekit.extras.accelerators import BaseAccelerator -from flytekit.image_spec.image_spec import ImageBuildEngine, ImageSpec +from flytekit.image_spec.image_spec import ImageBuildEngine, ImageSpec, _calculate_deduced_hash_from_image_spec from flytekit.loggers import logger from flytekit.models import task as _task_model from flytekit.models.security import Secret, SecurityContext @@ -276,7 +276,7 @@ def get_registerable_container_image(img: Optional[Union[str, ImageSpec]], cfg: :return: """ if isinstance(img, ImageSpec): - image = cfg.find_image(f"ft_{img.lhs}") + image = cfg.find_image(_calculate_deduced_hash_from_image_spec(img)) image_name = image.full if image else None if not image_name: ImageBuildEngine.build(img) diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index 83d67ae857..da52e63d13 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -23,7 +23,7 @@ @dataclass -class ImageSpec(TrackedInstance): +class ImageSpec: """ This class is used to specify the docker image that will be used to run the task. @@ -284,6 +284,21 @@ def _build_image(cls, builder, image_spec, img_name): cls._IMAGE_NAME_TO_REAL_NAME[img_name] = fully_qualified_image_name +@lru_cache +def _calculate_deduced_hash_from_image_spec(image_spec: ImageSpec): + """ + Calculate the hash from the image spec, + and it used to identify the imageSpec in the ImageConfig in the serialization context. + + ImageConfig: + - deduced hash 1: flyteorg/flytekit: 123 + - deduced hash 2: flyteorg/flytekit: 456 + """ + image_spec_bytes = asdict(image_spec).__str__().encode("utf-8") + # copy the image spec to avoid modifying the original image spec. otherwise, the hash will be different. + return base64.urlsafe_b64encode(hashlib.md5(image_spec_bytes).digest()).decode("ascii").rstrip("=") + + @lru_cache def calculate_hash_from_image_spec(image_spec: ImageSpec): """ diff --git a/flytekit/tools/translator.py b/flytekit/tools/translator.py index 32dee5c5d9..6e8cd247c9 100644 --- a/flytekit/tools/translator.py +++ b/flytekit/tools/translator.py @@ -23,6 +23,7 @@ from flytekit.core.task import ReferenceTask from flytekit.core.utils import ClassDecorator, _dnsify from flytekit.core.workflow import ReferenceWorkflow, WorkflowBase +from flytekit.image_spec.image_spec import _calculate_deduced_hash_from_image_spec from flytekit.models import common as _common_models from flytekit.models import common as common_models from flytekit.models import interface as interface_models @@ -185,7 +186,7 @@ def get_serializable_task( if settings.image_config.images is None: settings.image_config = ImageConfig.create_from(settings.image_config.default_image) settings.image_config.images.append( - Image.look_up_image_info(f"ft_{e.container_image.lhs}", e.get_image(settings)) + Image.look_up_image_info(_calculate_deduced_hash_from_image_spec(e.container_image), e.get_image(settings)) ) # In case of Dynamic tasks, we want to pass the serialization context, so that they can reconstruct the state