From 7b1337140c44f3fbc0c48edaa677f4c8ecca9dad Mon Sep 17 00:00:00 2001 From: junpeiz Date: Mon, 16 Sep 2024 13:36:32 -0700 Subject: [PATCH] 8.0 Release (#2342) --- coremlpython/CoreMLPython.h | 3 +- coremlpython/CoreMLPython.mm | 44 +- coremltools/__init__.py | 34 +- coremltools/_deps/__init__.py | 26 +- coremltools/converters/mil/frontend/_utils.py | 27 +- .../mil/frontend/tensorflow/test/test_ops.py | 46 +- .../tensorflow2/test/test_v2_ops_tf_keras.py | 5 - .../mil/frontend/torch/converter.py | 113 +- .../mil/frontend/torch/exir_utils.py | 15 +- .../mil/frontend/torch/internal_graph.py | 46 +- .../converters/mil/frontend/torch/load.py | 43 +- .../converters/mil/frontend/torch/ops.py | 1302 ++++++--- .../mil/frontend/torch/quantization_ops.py | 273 +- .../torch/test/test_internal_graph.py | 3 +- .../torch/test/test_torch_conversion_api.py | 114 + .../test/test_torch_export_conversion_api.py | 270 +- .../test/test_torch_export_quantization.py | 31 +- .../mil/frontend/torch/test/test_torch_ops.py | 2602 ++++++++++------- .../torch/test/test_torch_quantization_ops.py | 465 ++- .../torch/test/test_torch_stateful_model.py | 37 +- .../mil/frontend/torch/test/testing_utils.py | 65 +- .../converters/mil/frontend/torch/utils.py | 29 +- coremltools/converters/mil/mil/operation.py | 4 +- .../mil/mil/ops/defs/iOS18/compression.py | 4 +- .../mil/ops/tests/iOS16/test_constexpr_ops.py | 31 + .../defs/cleanup/const_deduplication.py | 45 +- .../defs/optimize_elementwise_binary.py | 5 +- .../mil/passes/defs/optimize_quantization.py | 112 + .../mil/mil/passes/defs/quantization.py | 6 + .../mil/mil/passes/pass_pipeline.py | 2 + .../mil/passes/tests/test_cleanup_passes.py | 132 + .../passes/tests/test_quantization_passes.py | 112 + coremltools/converters/mil/testing_utils.py | 53 +- coremltools/models/_compiled_model.py | 39 +- coremltools/models/model.py | 62 +- coremltools/optimize/__init__.py | 4 +- .../optimize/coreml/_quantization_passes.py | 2 + .../_post_training_quantization.py | 44 +- .../torch/quantization/_backend_config.py | 39 +- .../optimize/torch/quantization/_utils.py | 17 +- .../optimize/torch/quantization/quantizer.py | 3 +- coremltools/test/api/test_api_visibilities.py | 2 + .../test/ml_program/test_compression.py | 28 +- .../test/modelpackage/test_modelpackage.py | 44 +- .../neural_network/test_compiled_model.py | 31 +- .../test/neural_network/test_tf_numeric.py | 5 - .../test/optimize/api/test_optimize_api.py | 40 + .../coreml/test_post_training_quantization.py | 122 +- .../torch/quantization/test_configure.py | 10 +- .../torch/quantization/test_quantizer.py | 15 +- .../optimize/torch/quantization/test_utils.py | 10 +- coremltools/version.py | 2 +- docs-guides/source/flexible-inputs.md | 12 + docs-guides/source/model-prediction.md | 11 + reqs/test.pip | 13 +- 55 files changed, 4797 insertions(+), 1857 deletions(-) diff --git a/coremlpython/CoreMLPython.h b/coremlpython/CoreMLPython.h index 6bd6554f5..a1735842d 100644 --- a/coremlpython/CoreMLPython.h +++ b/coremlpython/CoreMLPython.h @@ -57,7 +57,7 @@ namespace CoreML { Model(const Model&) = delete; Model& operator=(const Model&) = delete; ~Model(); - explicit Model(const std::string& urlStr, const std::string& computeUnits, const std::string& functionName); + explicit Model(const std::string& urlStr, const std::string& computeUnits, const std::string& functionName, const py::dict& optimizationHints); explicit Model(MLModel* m_model, NSURL* compiledUrl, bool deleteCompiledModelOnExit); py::list batchPredict(const py::list& batch) const; @@ -67,6 +67,7 @@ namespace CoreML { py::dict predict(const py::dict& input, State* state=NULL) const; #if BUILT_WITH_MACOS15_SDK + static void setOptimizationHints(MLModelConfiguration *configuration, const py::dict& optimizationHints); State newState() const; #endif diff --git a/coremlpython/CoreMLPython.mm b/coremlpython/CoreMLPython.mm index f818f4985..0bd060d4a 100644 --- a/coremlpython/CoreMLPython.mm +++ b/coremlpython/CoreMLPython.mm @@ -42,7 +42,12 @@ bool usingMacOS13OrHigher() { } } -Model::Model(const std::string& urlStr, const std::string& computeUnits, const std::string& functionName) { +Model::Model( + const std::string& urlStr, + const std::string& computeUnits, + const std::string& functionName, + const py::dict& optimizationHints + ) { @autoreleasepool { NSError *error = nil; @@ -80,6 +85,10 @@ bool usingMacOS13OrHigher() { MLModelConfiguration *configuration = [MLModelConfiguration new]; setComputeUnit(configuration, computeUnits); +#if BUILT_WITH_MACOS15_SDK + setOptimizationHints(configuration, optimizationHints); +#endif + if (!functionName.empty()) { #if BUILT_WITH_MACOS15_SDK configuration.functionName = [NSString stringWithUTF8String:functionName.c_str()]; @@ -148,6 +157,37 @@ bool usingMacOS13OrHigher() { } +#if BUILT_WITH_MACOS15_SDK +void Model::setOptimizationHints(MLModelConfiguration *configuration, const py::dict& optimizationHints) { + // This function does minimal validation. It assumes Python layer has already validated. + + // Reshape frequency optimization hint + if (optimizationHints.contains("reshapeFrequency")) { + const std::string val = optimizationHints["reshapeFrequency"].cast(); + if (val == "Frequent") { + configuration.optimizationHints.reshapeFrequency = MLReshapeFrequencyHintFrequent; + } else { + assert(val == "Infrequent"); + configuration.optimizationHints.reshapeFrequency = MLReshapeFrequencyHintInfrequent; + } + } + + // Specialization strategy optimization hint + if (optimizationHints.contains("specializationStrategy")) { + const std::string val = optimizationHints["specializationStrategy"].cast(); + if (val == "Default") { + configuration.optimizationHints.specializationStrategy = MLSpecializationStrategyDefault; + } else { + assert(val == "FastPrediction"); + configuration.optimizationHints.specializationStrategy = MLSpecializationStrategyFastPrediction; + } + } + + +} +#endif + + py::list Model::batchPredict(const py::list& batch) const { @autoreleasepool { NSError* error = nil; @@ -237,7 +277,7 @@ bool usingMacOS13OrHigher() { py::module m("libcoremlpython", "CoreML.Framework Python bindings"); py::class_(m, "_MLModelProxy") - .def(py::init()) + .def(py::init()) .def("predict", &Model::predict) .def("batchPredict", &Model::batchPredict) .def("get_compiled_model_path", &Model::getCompiledModelPath) diff --git a/coremltools/__init__.py b/coremltools/__init__.py index db16e8bf6..4af9f558d 100644 --- a/coremltools/__init__.py +++ b/coremltools/__init__.py @@ -72,11 +72,35 @@ class ComputeUnit(_Enum): ''' The set of processing-unit configurations the model can use to make predictions. ''' - ALL = 1 # Allows the model to use all compute units available, including the neural engine - CPU_AND_GPU = 2 # Allows the model to use both the CPU and GPU, but not the neural engine - CPU_ONLY = 3 # Limit the model to only use the CPU - CPU_AND_NE = 4 # Allows the model to use both the CPU and neural engine, but not the GPU. - # Only available on macOS >= 13.0 + ALL = 1 # Allows model to use all compute units available, including the neural engine. + CPU_AND_GPU = 2 # Allows model to use both the CPU and GPU, but not the neural engine. + CPU_ONLY = 3 # Limits model to only use the CPU. + CPU_AND_NE = 4 # Allows model to use both the CPU and neural engine, but not the GPU. + # Only available on macOS >= 13.0 + + +class ReshapeFrequency(_Enum): + ''' + https://developer.apple.com/documentation/coreml/mlreshapefrequencyhint?language=objc + ''' + Frequent = 1 + Infrequent = 2 + + +class SpecializationStrategy(_Enum): + ''' + The optimization strategy for the model specialization. + + https://developer.apple.com/documentation/coreml/mlspecializationstrategy?language=objc + ''' + + # The strategy that works well for most applications. + Default = 1 + + # Prefer the prediction latency at the potential cost of specialization time, memory footprint, + # and the disk space usage of specialized artifacts. + FastPrediction = 2 + # A dictionary that maps the CoreML model specification version to the MLProgram/MIL opset string _OPSET = { diff --git a/coremltools/_deps/__init__.py b/coremltools/_deps/__init__.py index f8f0f28db..8e9d0b8ec 100644 --- a/coremltools/_deps/__init__.py +++ b/coremltools/_deps/__init__.py @@ -153,18 +153,33 @@ def __get_sklearn_version(version): # --------------------------------------------------------------------------------------- _HAS_TORCH = True -_TORCH_MAX_VERSION = "2.3.0" +_TORCH_MAX_VERSION = "2.4.0" _HAS_TORCH_EXPORT_API = False +_CT_OPTIMIZE_TORCH_MIN_VERSION = "2.1.0" +_IMPORT_CT_OPTIMIZE_TORCH = False try: import torch _warn_if_above_max_supported_version("Torch", torch.__version__, _TORCH_MAX_VERSION) - if _get_version(torch.__version__) >= Version("2.1.0"): + torch_version = _get_version(torch.__version__) + + if torch_version >= Version("2.1.0"): _HAS_TORCH_EXPORT_API = True + if torch_version >= Version(_CT_OPTIMIZE_TORCH_MIN_VERSION): + _IMPORT_CT_OPTIMIZE_TORCH = True + else: + logger.warning( + ( + f"Minimum required torch version for importing coremltools.optimize.torch is {_CT_OPTIMIZE_TORCH_MIN_VERSION}. " + f"Got torch version {torch_version}." + ) + ) + except: _HAS_TORCH = False MSG_TORCH_NOT_FOUND = "PyTorch not found." +MSG_TORCH_EXPORT_API_NOT_FOUND = "Torch.Export API not found." _HAS_TORCH_VISION = True @@ -189,6 +204,13 @@ def __get_sklearn_version(version): _HAS_EXECUTORCH = False MSG_EXECUTORCH_NOT_FOUND = "Executorch not found." +_HAS_TORCHAO = True +try: + import torchao +except: + _HAS_TORCHAO = False +MSG_TORCHAO_NOT_FOUND = "Torchao not found." + # --------------------------------------------------------------------------------------- try: import scipy diff --git a/coremltools/converters/mil/frontend/_utils.py b/coremltools/converters/mil/frontend/_utils.py index 4da82cee3..6ed160392 100644 --- a/coremltools/converters/mil/frontend/_utils.py +++ b/coremltools/converters/mil/frontend/_utils.py @@ -513,7 +513,13 @@ def _concat_dims(dims, none_if_empty=False): def _decompose_scaled_dot_product_attention( - q: Var, k: Var, v: Var, mask: Var, name: str, before_op: Optional[Operation] = None + q: Var, + k: Var, + v: Var, + mask: Var, + name: str, + scale: Optional[Var] = None, + before_op: Optional[Operation] = None, ) -> Var: # scale the query input embed_size = q.shape[-1] @@ -524,9 +530,12 @@ def _decompose_scaled_dot_product_attention( ) q, k, v = promote_input_dtypes([q, k, v]) - multiplicative_scale_factor = 1 / math.sqrt(embed_size) - if types.builtin_to_string(q.dtype) == "fp16": - multiplicative_scale_factor = np.float16(multiplicative_scale_factor) + if scale is None: + multiplicative_scale_factor = 1 / math.sqrt(embed_size) + if types.builtin_to_string(q.dtype) == "fp16": + multiplicative_scale_factor = np.float16(multiplicative_scale_factor) + else: + multiplicative_scale_factor = scale q = mb.mul(x=q, y=multiplicative_scale_factor, before_op=before_op) # multiply query and key input tensors @@ -583,6 +592,11 @@ def _construct_constexpr_dequant_op( scale = np.squeeze(scale) if isinstance(zero_point, (np.ndarray, np.generic)): zero_point = np.squeeze(zero_point) + if len(scale.shape) > 1 or len(zero_point.shape) > 1: + raise ValueError( + "The more fine-grained quantization (such as blockwise) is only supported since iOS18." + "Please set minimum_deployment_target to iOS18 for using it." + ) kwargs = { "quantized_data": quantized_weights, @@ -631,7 +645,10 @@ def _construct_constexpr_dequant_op( } if zero_point is not None and np.any(zero_point): # Only pass the offset parameter when not all elements in `zero_point` are zeroes. - zero_point = zero_point.reshape(scale.shape).astype(quantized_weights.dtype) + zero_point = zero_point.reshape(scale.shape) + # When zero_point is integer, it's required to have the same dtype as the quantized weight. + if np.issubdtype(zero_point.dtype, np.integer): + zero_point = zero_point.astype(quantized_weights.dtype) kwargs["offset"] = zero_point if name is not None: kwargs["name"] = name diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py index f100ac901..d5351a8bc 100644 --- a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py +++ b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py @@ -2622,15 +2622,6 @@ def test_ios17_resize_bilinear_dynamic_shape( target_shape, align_corners, ): - if ( - backend == ("mlprogram", "fp16") - and input_shape == (2, 5, 2, 3) - and target_shape == (20, 60) - ): - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) - """ Since iOS17, dynamic shape is supported by lowering to `resize` MIL op. """ @@ -2732,15 +2723,6 @@ def test_ios17_resize_nearest_neighbor_dynamic_shape( input_shape, target_shape, ): - if ( - backend == ("mlprogram", "fp16") - and input_shape == (2, 5, 2, 3) - and target_shape == (20, 60) - ): - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) - """ Since iOS17, dynamic shape is supported by lowering to `resize` MIL op. """ @@ -5706,10 +5688,8 @@ def test_sort(self, compute_unit, backend, rank, dynamic): """ tf.sort dispatches to tf.math.top_k, and k = size of the axis to be sorted """ - if backend[0] == "mlprogram" and dynamic: - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) + if platform.machine() == "x86_64" and dynamic: + pytest.xfail("rdar://135843153 ([Bug] Models failed on x86_64 platform)") # Here we test the conversion of tf.sort(x, axis=0) # If dynamic, we prepend None to x shape as the dynamic shape axis @@ -6720,7 +6700,6 @@ def build_model(x): def test_programmatic( self, compute_unit, backend, input_block_rank, dynamic_input, dynamic_paddings ): - input_rank, block_rank = input_block_rank # generate data @@ -6733,6 +6712,9 @@ def test_programmatic( if block_shape[0] == 1: pytest.skip("neuralnetwork backend doesn't support unity block shape.") + if input_block_rank == (4, 1) and dynamic_input and not dynamic_paddings: + pytest.xfail("rdar://133558007 shape deduction failure") + paddings = [] for i in range(block_rank): while True: @@ -6832,14 +6814,12 @@ def test_programmatic( self, compute_unit, backend, input_block_rank, dynamic_input, dynamic_crops ): if ( - backend == ("mlprogram", "fp16") - and input_block_rank == (3, 1) or (3,2) + platform.machine() == "x86_64" + and input_block_rank == (3, 1) and dynamic_input and not dynamic_crops ): - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) + pytest.xfail("rdar://135843153 ([Bug] Models failed on x86_64 platform)") input_rank, block_rank = input_block_rank @@ -6939,16 +6919,6 @@ def test_smoke_new_op( input_shape, block_shape, crops = shape_block_crops crops = np.array(crops, dtype=np.int32) - if ( - backend == ("mlprogram", "fp16") - and shape_block_crops == [(4, 4, 6, 1), [1, 2], [[2, 1], [3, 3]]] - and dynamic_input - and not dynamic_crops - ): - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) - # The neuralnetwork backend doesn't support these tests if backend[0] == "neuralnetwork": return diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py index 20ab10d01..ec41a251c 100644 --- a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py +++ b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py @@ -1389,11 +1389,6 @@ def test_lstm_time_distributed_dense(self, compute_unit, backend): "compute_unit, backend", itertools.product(compute_units, backends) ) def test_lstm_dynamic_batch(self, compute_unit, backend): - if backend == ("mlprogram", "fp16"): - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) - input_shape = (1, 1280) inp = tf.keras.layers.Input(shape=input_shape) out, hn, cn = tf.keras.layers.LSTM(512, diff --git a/coremltools/converters/mil/frontend/torch/converter.py b/coremltools/converters/mil/frontend/torch/converter.py index de94511d3..b5cd8277e 100644 --- a/coremltools/converters/mil/frontend/torch/converter.py +++ b/coremltools/converters/mil/frontend/torch/converter.py @@ -29,6 +29,7 @@ from coremltools.optimize.coreml import _utils as optimize_utils from coremltools.optimize.coreml._quantization_passes import prune_weights +from .exir_utils import WRAPPED_SCALAR_INPUT_SUFFIX from .internal_graph import InternalTorchIRGraph, InternalTorchIRNode from .ops import convert_nodes from .quantization_ops import _dequantized_weight @@ -41,7 +42,13 @@ remove_getattr_nodes, transform_inplace_ops, ) -from .utils import NUM_TO_NUMPY_DTYPE, TORCH_DTYPE_TO_MIL_DTYPE, TORCH_DTYPE_TO_NUM, TorchFrontend +from .utils import ( + NUM_TO_NUMPY_DTYPE, + TORCH_DTYPE_TO_MIL_DTYPE, + TORCH_DTYPE_TO_NUM, + TORCH_EXPORT_BASED_FRONTENDS, + TorchFrontend, +) if _HAS_TORCH_EXPORT_API: from torch.export import ExportedProgram @@ -329,8 +336,8 @@ def prepare_for_conversion(self, node: InternalTorchIRNode) -> None: state feeds into only one ``read_state`` op. """ - # EXIR has nothing to prepare - if self.frontend == TorchFrontend.EXIR: + # Only torch script needs to prepare + if self.frontend != TorchFrontend.TORCHSCRIPT: return for val in node.inputs: @@ -431,7 +438,7 @@ def process_inplace_op(self, node: InternalTorchIRNode) -> None: } """ - assert self.frontend != TorchFrontend.EXIR, "EXIR has no in-place op" + assert self.frontend == TorchFrontend.TORCHSCRIPT, "Only torch script has no in-place op" if len(node.inputs) == 0: return @@ -475,7 +482,11 @@ def __getitem__(self, torch_name: str) -> Var: def __contains__(self, torch_name): """Returns whether or not the torch var exist in context.""" - return torch_name in self._current_graph[-1] + for idx in reversed(range(len(self._current_graph))): + current_graph = self._current_graph[idx] + if torch_name in current_graph: + return True + return False def push(self, inputs=None): """ @@ -594,10 +605,19 @@ def __init__( p(self.graph) elif _HAS_TORCH_EXPORT_API and isinstance(loaded_model, ExportedProgram): - self.context = TranscriptionContext(frontend=TorchFrontend.EXIR) + if loaded_model.dialect == "ATEN": + frontend = TorchFrontend.TORCHEXPORT + elif loaded_model.dialect == "EDGE": + frontend = TorchFrontend.EXECUTORCH + else: + raise NotImplementedError( + "Conversion for models with only ATEN or EDGE dialect is supported/tested. " + f"Provided Dialect: {loaded_model.dialect}" + ) + self.context = TranscriptionContext(frontend=frontend) self.graph = InternalTorchIRGraph.from_exir(exir=loaded_model) # For iOS 18+, create states for all mutable buffers - if self.opset_version >= _target.iOS18: + if self.opset_version is not None and self.opset_version >= _target.iOS18: self.states = [] for name, tensor in self.graph.buffers.items(): dtype = NUM_TO_NUMPY_DTYPE[TORCH_DTYPE_TO_NUM[tensor.dtype]] @@ -640,6 +660,18 @@ def __init__( self.param_to_compression_info = self._construct_compression_info( state_dict() if callable(state_dict) else state_dict ) + if self.context.frontend in TORCH_EXPORT_BASED_FRONTENDS: + # For EXIR, all param names are lifted as input names (in the format of `argx_x`), so we need to + # change names accordingly to make sure the compression info could be found later. + for ( + arg_name, + param_name, + ) in loaded_model.graph_signature.inputs_to_parameters.items(): + if param_name in self.param_to_compression_info: + self.param_to_compression_info[arg_name] = self.param_to_compression_info[ + param_name + ] + del self.param_to_compression_info[param_name] def _validate_states(self) -> None: """ @@ -780,7 +812,7 @@ def _construct_compression_info( """ compression_info = dict() for torch_key_name in state_dict.keys(): - if torch_key_name == f"{_COMPRESSION_INFO_PREFIX}/metadata_version": + if f"{_COMPRESSION_INFO_PREFIX}/metadata_version" in torch_key_name: # TODO: rdar://124707382 ([Compression] Support versioning in CompressionInfo) continue @@ -1189,15 +1221,15 @@ def convert_const(self) -> None: ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=scope_name), ): self._add_const(name, val) - elif self.context.frontend == TorchFrontend.EXIR: - # ExecuTorch has constants lifted as inputs, yet we have not sorted out + elif self.context.frontend in TORCH_EXPORT_BASED_FRONTENDS: + # Torch.Export has constants lifted as inputs, yet we have not sorted out # how to support IO metadata, so for now just put a dummy metadata # since inputs/constants will not contribute to debugging/profiling # TODO (rdar://125572392): Support torch.export IO metadata - with mb.scope( - ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=[None]), - ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None]), - ): + scopes = [ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=[None])] + if self.context.frontend == TorchFrontend.EXECUTORCH: + scopes.append(ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None])) + with mb.scope(*scopes): self._add_const(name, val) else: raise ValueError(f"Invalid PyTorch frontend {self.context.frontend}") @@ -1249,7 +1281,7 @@ def convert(self) -> Program: ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=torch_name), ): input_var = mb.cast(x=input_var, dtype="fp32") - elif self.context.frontend == TorchFrontend.EXIR: + elif self.context.frontend in TORCH_EXPORT_BASED_FRONTENDS: # EXIR has dtypes all determined, so for now we just stick to EXIR dtypes # TODO (rdar://115845792): Handle fp16 IO dtypes # When handle user provided IO dtypes, we will also need to handle IO metadata @@ -1261,21 +1293,41 @@ def convert(self) -> Program: raise ValueError( "To use fp16 input, please set minimum deployment target to iOS16+" ) + # Torch.export may produce scalar input, + # which then gets wrapped as rank-1 size-1 tensor for Core ML residency + # during our internal graph construction. + # Here we squeeze it back to scalar + if torch_name.endswith(WRAPPED_SCALAR_INPUT_SUFFIX): + torch_name = torch_name[: -len(WRAPPED_SCALAR_INPUT_SUFFIX)] + scopes = [ + ScopeInfo( + source=ScopeSource.EXIR_STACK_TRACE, + data=f"unwrap_scalar_input_{torch_name}", + ) + ] + if self.context.frontend == TorchFrontend.EXECUTORCH: + scopes.append( + ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None]) + ) + with mb.scope(*scopes): + input_var = mb.squeeze(x=input_var, name=torch_name) else: raise ValueError(f"Invalid PyTorch frontend {self.context.frontend}") self.context.add(input_var, torch_name=torch_name) # EXIR lifts buffer references as inputs, so we need to create them by reading states - if self.context.frontend == TorchFrontend.EXIR: + if self.context.frontend in TORCH_EXPORT_BASED_FRONTENDS: for ( input_name, buffer_name, ) in self.context.torch_graph.input_name_to_source_buffer_name.items(): buffer_var = self.context[buffer_name] - with mb.scope( - ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=f"read_{buffer_name}"), - ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None]), - ): + scopes = [ + ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=f"read_{buffer_name}") + ] + if self.context.frontend == TorchFrontend.EXECUTORCH: + scopes.append(ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None])) + with mb.scope(*scopes): input_var = mb.read_state(input=buffer_var) # As of iOS 18, Core ML state can only be fp16 # In torch converter, we convert everything under fp32 @@ -1295,17 +1347,19 @@ def convert(self) -> Program: # EXIR represents stateful execution as buffer mutation at output, # i.e. buffer.copy_(...) at the end of EXIR program, # so analogously we update state at the end of pymil function - if self.context.frontend == TorchFrontend.EXIR: + if self.context.frontend in TORCH_EXPORT_BASED_FRONTENDS: for ( output_name, buffer_name, ) in self.context.torch_graph.output_name_to_target_buffer_name.items(): output_var = self.context[output_name] buffer_var = self.context[buffer_name] - with mb.scope( - ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=f"write_{buffer_name}"), - ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None]), - ): + scopes = [ + ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=f"write_{buffer_name}") + ] + if self.context.frontend == TorchFrontend.EXECUTORCH: + scopes.append(ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None])) + with mb.scope(*scopes): cast_value = mb.cast( x=output_var, dtype=builtin_to_string(buffer_var.dtype) ) @@ -1350,11 +1404,10 @@ def convert(self) -> Program: ScopeSource.TORCHSCRIPT_MODULE_NAME, ScopeSource.TORCHSCRIPT_MODULE_TYPE, ] - elif self.context.frontend == TorchFrontend.EXIR: - essential_scope_sources = [ - ScopeSource.EXIR_STACK_TRACE, - ScopeSource.EXIR_DEBUG_HANDLE, - ] + elif self.context.frontend in TORCH_EXPORT_BASED_FRONTENDS: + essential_scope_sources = [ScopeSource.EXIR_STACK_TRACE] + if self.context.frontend == TorchFrontend.EXECUTORCH: + essential_scope_sources.append(ScopeSource.EXIR_DEBUG_HANDLE) else: raise ValueError(f"Invalid PyTorch frontend {self.context.frontend}") prog._add_essential_scope_source(essential_scope_sources) diff --git a/coremltools/converters/mil/frontend/torch/exir_utils.py b/coremltools/converters/mil/frontend/torch/exir_utils.py index 810e5ac57..7dd1d0999 100644 --- a/coremltools/converters/mil/frontend/torch/exir_utils.py +++ b/coremltools/converters/mil/frontend/torch/exir_utils.py @@ -15,6 +15,8 @@ from .utils import TORCH_DTYPE_TO_MIL_DTYPE +WRAPPED_SCALAR_INPUT_SUFFIX = "_wrapped_as_tensor_for_coreml" + def _map_sympy_number_to_int(sympy_number: sympy.core.numbers.Number) -> int: MAX_DIM = 2147483647 @@ -25,7 +27,6 @@ def _map_sympy_number_to_int(sympy_number: sympy.core.numbers.Number) -> int: def _construct_ct_range_dim_from_torch_value_ranges( - symbol_name: str, value_ranges, # torch.utils._sympy.value_ranges.ValueRanges ) -> RangeDim: if value_ranges.is_bool: @@ -33,7 +34,7 @@ def _construct_ct_range_dim_from_torch_value_ranges( lower = _map_sympy_number_to_int(value_ranges.lower) upper = _map_sympy_number_to_int(value_ranges.upper) - return RangeDim(lower_bound=lower, upper_bound=upper, symbol=symbol_name) + return RangeDim(lower_bound=lower, upper_bound=upper) def _construct_symbol_name_to_ct_range_dim_dict( @@ -43,7 +44,7 @@ def _construct_symbol_name_to_ct_range_dim_dict( for symbol, value_ranges in exported_program.range_constraints.items(): symbol_name = str(symbol) symbol_name_to_ct_range_dim[symbol_name] = _construct_ct_range_dim_from_torch_value_ranges( - symbol_name, value_ranges + value_ranges ) return symbol_name_to_ct_range_dim @@ -69,6 +70,14 @@ def _construct_ct_tensor_type_from_torch( else: shape.append(int(size)) + if len(shape) == 0: + shape = [1] + logger.warning( + "Core ML does not support scalar input, " + f"so {name} has been wrapped as rank-1 size-1 tensor" + ) + name = name + WRAPPED_SCALAR_INPUT_SUFFIX + return TensorType(name=name, dtype=coreml_dtype, shape=shape) diff --git a/coremltools/converters/mil/frontend/torch/internal_graph.py b/coremltools/converters/mil/frontend/torch/internal_graph.py index ea121f71a..129ecd2a8 100644 --- a/coremltools/converters/mil/frontend/torch/internal_graph.py +++ b/coremltools/converters/mil/frontend/torch/internal_graph.py @@ -13,21 +13,25 @@ from coremltools.converters.mil.input_types import TensorType from .exir_utils import extract_io_from_exir_program +from .torch_op_registry import _TORCH_OPS_REGISTRY from .torchscript_utils import _expand_and_optimize_ir from .utils import TORCH_DTYPE_TO_NUM, sanitize_op_kind -def _make_ssa_name(name: str) -> str: +def _make_ssa_name(name: Optional[Union[str, int]]) -> str: """ Converts a symbol name (string) into an SSA name, by prepending '%'. + If the name is a parameter value (int), directly printing it without prepending '%'. Only used for pretty printing the graph. """ if name is None: return "None" + if type(name) is int: + return str(name) return "%" + name -def _ssa_name_list(names: List[str]) -> List[str]: +def _ssa_name_list(names: List[Optional[Union[str, int]]]) -> List[str]: """ Take a list of symbol names (strings) and return them as SSA names. Only used for pretty printing the graph. @@ -161,6 +165,7 @@ def __init__( kind: str, inputs: List[str], outputs: List[str], + kwinputs: Optional[Dict[str, str]] = None, name: Optional[str] = None, parent: Optional[Union["InternalTorchIRGraph", "InternalTorchIRBlock"]] = None, attr: Optional[Dict[str, Any]] = None, @@ -174,6 +179,7 @@ def __init__( kind: the kind (op) of the node. inputs: list of input symbols. outputs: list of output symbols. + kwinputs: dict of keyword input symbols. parent: The InternalTorchIRGraph/Block this node belongs to. attr: dict of named attributes. blocks: list of InternalTorchIRBlock. @@ -188,6 +194,7 @@ def __init__( self.kind = kind self.inputs = inputs self.outputs = outputs + self.kwinputs = kwinputs self.parent = parent self.attr = attr if attr is not None else {"value": None} self.blocks = blocks if blocks is not None else [] @@ -233,14 +240,14 @@ def from_torchscript_node(cls, node, parent): @classmethod def from_exir_node(cls, node): - def get_arguments(alist): + def get_arguments(alist: List) -> Tuple: args = [] for i in alist: if isinstance(i, torch.fx.Node): args.append(i.name) elif isinstance(i, torch.fx.immutable_collections.immutable_list): args.append(get_arguments(i)) - elif isinstance(i, (int, float)): + elif isinstance(i, (int, float, str)): args.append(i) # This is necessitated by backward compatibility: # * TorchScript used to store dtype as integers/enums @@ -251,17 +258,21 @@ def get_arguments(alist): # to leverage the existing TorchScript converter infra elif isinstance(i, torch.dtype): args.append(TORCH_DTYPE_TO_NUM[i]) + elif ( + isinstance(i, torch.device) + or isinstance(i, torch.layout) + or isinstance(i, torch.memory_format) + ): + # PyMIL graph does not care about these things + pass elif i is None: args.append(None) else: - raise AssertionError(f"Unhandled type of the node: {type(i)}") + raise AssertionError( + f"Unhandled node type {type(i)}. Node content is: {str(i)}" + ) return tuple(args) - # TODO (rdar://128768037) handle kwargs - inputs = get_arguments(node.args) - # TODO: rdar://115846125 ([Executorch] Handle Models/Layers with Multiple outputs) - outputs = [node.name] - try: kind = node.target.name() except: @@ -270,6 +281,20 @@ def get_arguments(alist): else: kind = str(node.target) kind = sanitize_op_kind(kind) + if not kind in _TORCH_OPS_REGISTRY: + raise ValueError(f"Unsupported fx node {str(node)}, kind {kind}") + + # TODO (rdar://134015126) handle kwargs + inputs = get_arguments(node.args) + # TODO: rdar://115846125 ([Executorch] Handle Models/Layers with Multiple outputs) + outputs = [node.name] + + kwinputs = {} + for keyword, arg in node.kwargs.items(): + if arg is not None: + kwinputs[keyword] = get_arguments([arg]) + if len(kwinputs) == 0: + kwinputs = None name = node.name return cls( @@ -277,6 +302,7 @@ def get_arguments(alist): kind=kind, inputs=inputs, outputs=outputs, + kwinputs=kwinputs, parent=None, attr=None, blocks=None, diff --git a/coremltools/converters/mil/frontend/torch/load.py b/coremltools/converters/mil/frontend/torch/load.py index eda38fe28..e84877a67 100644 --- a/coremltools/converters/mil/frontend/torch/load.py +++ b/coremltools/converters/mil/frontend/torch/load.py @@ -10,16 +10,20 @@ from torch.jit._script import RecursiveScriptModule from coremltools import _logger as logger -from coremltools._deps import _HAS_TORCH_EXPORT_API +from coremltools._deps import _HAS_EXECUTORCH, _HAS_TORCH_EXPORT_API from coremltools.converters.mil.frontend.torch.converter import TorchConverter from coremltools.converters.mil.input_types import StateType, TensorType from coremltools.converters.mil.mil.program import Program from .converter import TorchConverter +from .utils import TorchFrontend if _HAS_TORCH_EXPORT_API: from torch.export import ExportedProgram +if _HAS_EXECUTORCH: + import executorch.exir + def load( spec: Union[RecursiveScriptModule, "ExportedProgram", str], @@ -108,16 +112,43 @@ def _torchscript_from_spec(model_spec: Union[str, RecursiveScriptModule]) -> Rec elif isinstance(model_spec, _torch.jit.ScriptModule): return model_spec - elif _HAS_TORCH_EXPORT_API and isinstance(model_spec, ExportedProgram): - return model_spec + else: raise TypeError( - "A PyTorch model must either be a .pt or .pth file, or a TorchScript object. Received: {}".format( - type(model_spec) - ) + "A PyTorch model must either be a .pt or .pth file, or a TorchScript object. " + f"Received: {type(model_spec)}" ) +if _HAS_TORCH_EXPORT_API: + + def _torchexport_from_spec( + model_spec: Union[str, ExportedProgram], + frontend=TorchFrontend.TORCHEXPORT, + ) -> ExportedProgram: + # Load torch.export serialization + if isinstance(model_spec, str) and model_spec.endswith(".pt2"): + filename = _os_path.abspath(model_spec) + try: + model = _torch.export.load(filename) + except Exception as e: + logger.error( + "\n\nERROR - Could not load the PyTorch model. Got the following error:\n" + ) + raise e + elif isinstance(model_spec, ExportedProgram): + model = model_spec + else: + raise TypeError( + "A PyTorch model must either be a .pt2 file, or an ExportedProgram object. " + f"Received: {type(model_spec)}" + ) + # To edge if edge dialect is desired + if frontend == TorchFrontend.EXECUTORCH and model.dialect != "EDGE": + model = executorch.exir.to_edge(model).exported_program() + return model + + def _perform_torch_convert(converter: TorchConverter, debug: bool) -> Program: try: prog = converter.convert() diff --git a/coremltools/converters/mil/frontend/torch/ops.py b/coremltools/converters/mil/frontend/torch/ops.py index 1047bfc4d..e38bc0473 100644 --- a/coremltools/converters/mil/frontend/torch/ops.py +++ b/coremltools/converters/mil/frontend/torch/ops.py @@ -8,7 +8,7 @@ import numbers import re from collections.abc import Iterable -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union import numpy as _np import numpy as np @@ -42,6 +42,7 @@ NUM_TO_TORCH_DTYPE, NUMPY_DTYPE_TO_TORCH_NUM, TORCH_DTYPE_TO_NUM, + TORCH_EXPORT_BASED_FRONTENDS, TYPE_TO_DTYPE_STRING, TorchFrontend, dtype_to_32bit, @@ -56,6 +57,22 @@ VALUE_CLOSE_TO_INFINITY = 1e+38 +TORCH_STRING_ARGS = { + # conv padding + "same", + "valid", + + # meshgrid indexing + "ij", + "xy", + + # pad mode + "circular", + "constant", + "reflect", + "replicate", +} + def _all_outputs_present(context, graph): """ @@ -127,11 +144,16 @@ def convert_single_node(context: TranscriptionContext, node: InternalTorchIRNode ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=scope_type), ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=scope_name), ] - elif context.frontend == TorchFrontend.EXIR: + elif context.frontend in TORCH_EXPORT_BASED_FRONTENDS: scopes = [ - ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=[node.meta.get("stack_trace")]), - ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[node.meta.get("debug_handle")]), + ScopeInfo(source=ScopeSource.EXIR_STACK_TRACE, data=[node.meta.get("stack_trace")]) ] + if context.frontend == TorchFrontend.EXECUTORCH: + scopes.append( + ScopeInfo( + source=ScopeSource.EXIR_DEBUG_HANDLE, data=[node.meta.get("debug_handle")] + ) + ) else: raise ValueError(f"Invalid PyTorch frontend {context.frontend}") @@ -180,6 +202,40 @@ def _assert_torch_dtype_num_is_not_complex_number(num): "This op does not support complex number dtype." +def _get_bindings(context, alist) -> List[Var]: + """ + This utility is needed in order to handle following cases: + With EXIR, + - Some of the inputs can be literals (like axis, perms) and thus can be of types: list, int etc. + - An Input Parameter of an op could be a list/tuple similar to our concat layer + """ + results = [] + + for i in alist: + if isinstance(i, str): + if i in context: + results.append(context[i]) + elif i in TORCH_STRING_ARGS: + results.append(i) + else: + raise ValueError( + f"Binding {i} is neither a name of exisitng var in context, " + "nor a torch string argument" + ) + elif isinstance(i, (list, tuple)) and all(isinstance(j, int) for j in i): + results.append(mb.const(val=i)) + elif isinstance(i, (list, tuple)): + results.append(_get_bindings(context, i)) + elif isinstance(i, (int, float)): + results.append(mb.const(val=i)) + elif i is None: + results.append(None) + else: + raise NotImplementedError(f"Binding of inputs of type {type(i)} not handled yet") + + return results + + def _get_inputs( context, node, @@ -192,49 +248,21 @@ def _get_inputs( value of @expected. """ - def get_bindings(alist) -> List[Any]: - """ - This utility is needed in order to handle following cases: - With EXIR, - - Some of the inputs can be literals (like axis, perms) and thus can be of types: list, int etc. - - An Input Parameter of an op could be a list/tuple similar to our concat layer - """ - results = [] - - for i in alist: - if isinstance(i, str): - results.append(context[i]) - elif isinstance(i, (list, tuple)) and all(isinstance(j, int) for j in i): - results.append(mb.const(val=i)) - elif isinstance(i, (list, tuple)): - results.append(get_bindings(i)) - elif isinstance(i, (int, float)): - results.append(mb.const(val=i)) - elif i is None: - results.append(None) - else: - raise NotImplementedError(f"Binding of inputs of type {type(i)} not handled yet") - - return results - def check_if_number_of_inputs_expected(num_inputs: int, expected: Union[int, List, Tuple]) -> None: expected = [expected] if isinstance(expected, int) else expected if num_inputs not in expected: raise ValueError( - "node {} ({}) got {} input(s), expected {}".format( - node.name, node.kind, num_inputs, expected - ) + f"node {node.name} ({node.kind}) got {num_inputs} input(s), expected {expected}" ) def check_if_number_of_inputs_more_than_min_expected(num_inputs: int, min_expected: int) -> None: if num_inputs < min_expected: raise ValueError( - "node {} ({}) got {} input(s), expected minimum {} inputs".format( - node.name, node.kind, num_inputs, min_expected - ) + f"node {node.name} ({node.kind}) got {num_inputs} input(s), " + f"expected minimum {min_expected} inputs" ) - inputs = get_bindings(node.inputs) + inputs = _get_bindings(context, node.inputs) if expected is not None: if isinstance(expected, dict): @@ -253,6 +281,17 @@ def check_if_number_of_inputs_more_than_min_expected(num_inputs: int, min_expect return inputs +def _get_kwinputs(context, node, keyword: str, default: Optional[List[Var]] = None) -> List[Var]: + if node.kwinputs is None: + return default + else: + bindings = node.kwinputs.get(keyword) + if bindings is None: + return default + else: + return _get_bindings(context, bindings) + + def _list_select(shape_var, index): """ Sometimes we need to select a specific item from a list. If that item @@ -337,7 +376,7 @@ def _construct_constant(val, name): @register_torch_op def native_dropout(context, node): - if context.frontend == TorchFrontend.EXIR: + if context.frontend in TORCH_EXPORT_BASED_FRONTENDS: inputs = _get_inputs(context, node, min_expected=2) context.add((inputs[0],), node.name) else: @@ -825,7 +864,7 @@ def gt(context, node): context.add(greater) -@register_torch_op(torch_alias=["t", "numpy_t", "transpose.int"]) +@register_torch_op(torch_alias=["t", "numpy_t"]) def transpose(context, node): assert len(node.outputs) == 1 inputs = _get_inputs(context, node) @@ -976,76 +1015,203 @@ def linear(context, node): context.add(res, torch_name=node.name) -@register_torch_op(torch_alias=["conv2d", "convolution"]) +@register_torch_op( + torch_alias=[ + "convolution", + "conv1d", + "conv2d", + "conv3d", + "conv1d.padding", + "conv2d.padding", + "conv3d.padding", + "conv_transpose1d", + "conv_transpose2d.input", + "conv_transpose3d.input", + ] +) def _convolution(context, node): - inputs = _get_inputs(context, node) - - x = inputs[0] - # PyTorch and MIL has same weight layout - # Conv: [Cout, Cin, *D] - # ConvTranspose: [Cin, Cout, *D] - weight = inputs[1] - bias = inputs[2] - strides = inputs[3] - - x, weight = promote_input_dtypes([x, weight]) - - # Expand padding. Torch accepts either an int (for all dimensions) or an n-tuple of ints (one per dimension), but - # we require a (2 * n)-tuple, where n is the number of spatial dimensions, start and end for each spatial dimension - pad = inputs[4].val - - if len(weight.shape) in (3, 4): - # 1D and 2D: Need to explicitly state L-R, T-B pad - pad = _np.repeat(pad, 2) - elif len(weight.shape) == 5: - # 3D: Need to explicitly state F-Bk, L-R, T-B pad - if type(pad) == int: - pad = _np.repeat(pad, 6) - elif len(pad) == 3: - pad = _np.repeat(pad, 2) - else: - raise ValueError( - "Invalid weight dimension. Must be 3, 4, or 5 for 1D, 2D, or 3D convolution, respectively." + default_torch_padding = "valid" if node.kind.endswith(".padding") else 0 + + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs( + context, + node, + min_expected={ + TorchFrontend.TORCHSCRIPT: 7, + TorchFrontend.TORCHEXPORT: 2, + TorchFrontend.EXECUTORCH: 2, + }, ) + nargs = len(inputs) - dilations = inputs[5] - out_pad = None - if len(inputs) >= 9: - transposed = inputs[6].val - out_pad = inputs[7].val - group = inputs[8] - elif len(inputs) == 7: - transposed = False - group = inputs[6] - else: - raise ValueError( - "unexpected number of inputs for node {} ({}): {}".format( - node.name, node.kind, len(inputs) - ) - ) + x = inputs[0] + # PyTorch and MIL has same weight layout + # Conv: [Cout, Cin, *D] + # ConvTranspose: [Cin, Cout, *D] + weight = inputs[1] + x, weight = promote_input_dtypes([x, weight]) + + bias = inputs[2] if nargs > 2 else None + stride = inputs[3] if nargs > 3 else 1 + padding = inputs[4] if nargs > 4 else default_torch_padding + + if node.kind in ("_convolution", "convolution"): + dilation = inputs[5] if nargs > 5 else 1 + transposed = inputs[6].val if nargs > 6 else False + out_padding = inputs[7] if nargs > 7 else 0 + groups = inputs[8] if nargs > 8 else 1 + elif re.match(r"conv_transpose[123]d.*", node.kind): + out_padding = inputs[5] if nargs > 5 else 0 + groups = inputs[6] if nargs > 6 else 1 + dilation = inputs[7] if nargs > 7 else 1 + transposed = True + else: + dilation = inputs[5] if nargs > 5 else 1 + groups = inputs[6] if nargs > 6 else 1 + transposed = False + out_padding = 0 + + return x, weight, bias, stride, padding, dilation, groups, transposed, out_padding + + def _parse_keyword_args( + context, node, bias, stride, padding, dilation, groups, out_padding + ) -> Tuple[Var]: + # Only torch.export may have kwargs + if context.frontend != TorchFrontend.TORCHEXPORT: + return bias, stride, padding, dilation, groups, out_padding + + bias = _get_kwinputs(context, node, "bias", default=[bias])[0] + stride = _get_kwinputs(context, node, "stride", default=[stride])[0] + padding = _get_kwinputs(context, node, "padding", default=[padding])[0] + dilation = _get_kwinputs(context, node, "dilation", default=[dilation])[0] + groups = _get_kwinputs(context, node, "groups", default=[groups])[0] + out_padding = _get_kwinputs(context, node, "out_padding", default=[out_padding])[0] + + return bias, stride, padding, dilation, groups, out_padding + + def _translate_torch_args(node, weight, stride, padding, dilation, groups, out_padding): + spatial_rank = weight.rank - 2 + + # Core ML strides comes from torch stride + if isinstance(stride, Var): + stride = stride.val + assert stride is not None, "torch conv stride must be constant" + # Torch stride is an int (for all spatial dims) or an n-tuple of ints (one per spatial dim) + # Core ML requires an n-tuple + if isinstance(stride, int) or len(stride) == 1: + strides = _np.array([np.squeeze(stride)] * spatial_rank) + else: + strides = stride + # 1 is Core ML default value, so using None is preferred + if _np.all(strides == 1): + strides = None + + # Core ML pad_type and pad come from torch padding + # For torch conv op .padding variants, torch padding is a string, + # with possible values ("valid", "same") + if node.kind.endswith(".padding"): + pad_type = padding + if isinstance(pad_type, Var): + assert pad_type.val is not None + pad_type = pad_type.val + assert pad_type in ("valid", "same") + # Core ML pad is None for pad_type "valid" / "same" + pad = None + # For other torch conv op variants, torch padding is + # an int (for all spatial dims) or an n-tuple of ints (one per spatial dim) + else: + if isinstance(padding, Var): + padding = padding.val + assert padding is not None, "torch conv padding must be constant" + # Core ML requires a (2 * n)-tuple, start and end for each spatial dim + if isinstance(padding, int) or len(padding) == 1: + pad = _np.array([np.squeeze(padding)] * (2 * spatial_rank)) + else: + assert len(padding) == spatial_rank + pad = _np.repeat(padding, 2) + # Create Core ML pad_type according to Core ML pad + if _np.all(pad == 0): + pad_type = "valid" + # 0 is Core ML default value, so using None is preferred + pad = None + else: + pad_type = "custom" + + # Core ML dilations comes from torch dilation + if isinstance(dilation, Var): + dilation = dilation.val + assert dilation is not None, "torch conv dilation must be constant" + # Torch dilation is an int (for all spatial dims) or an n-tuple of ints (one per spatial dim) + # Core ML requires an n-tuple + if isinstance(dilation, int) or len(dilation) == 1: + dilations = _np.array([np.squeeze(dilation)] * spatial_rank) + else: + dilations = dilation + # 1 is Core ML default value, so using None is preferred + if _np.all(dilations == 1): + dilations = None + + # Core ML groups is torch groups + if isinstance(groups, Var): + groups = groups.val + assert groups is not None, "torch conv groups must be constant" + # 1 is Core ML default value, so using None is preferred + if groups == 1: + groups = None + + if isinstance(out_padding, Var): + out_padding = out_padding.val + assert out_padding is not None, "torch out_padding must be constant" + # 0 is Core ML default value, so using None is preferred + if _np.all(out_padding == 0): + out_padding = None + + return strides, pad_type, pad, dilations, groups, out_padding + + ( + x, + weight, + bias, + stride, + padding, + dilation, + groups, + transposed, + out_padding, + ) = _parse_positional_args(context, node) + bias, stride, padding, dilation, groups, out_padding = _parse_keyword_args( + context, node, bias, stride, padding, dilation, groups, out_padding + ) + strides, pad_type, pad, dilations, groups, out_padding = _translate_torch_args( + node, weight, stride, padding, dilation, groups, out_padding + ) kwargs = { "x": x, "weight": weight, - "strides": strides, - "pad_type": "custom", - "pad": pad, - "dilations": dilations, - "groups": group, + "pad_type": pad_type, "name": node.name, } - # Bias is optional in PyTorch's convolution. if bias is not None: kwargs["bias"] = bias + if pad_type == "custom": + kwargs["pad"] = pad + if strides is not None: + kwargs["strides"] = strides + if dilations is not None: + kwargs["dilations"] = dilations + if groups is not None: + kwargs["groups"] = groups if transposed is True: + pad_len = 2 * (weight.rank - 2) # Transposed convolution # Handle output_padding using pre-pad or post-crop - pre_pad = [0] * len(pad) - post_crop = [0] * len(pad) + pre_pad = [0] * pad_len + post_crop = [0] * pad_len - if out_pad is not None and any(out_pad): - output_padding = [0] * len(pad) + if out_padding is not None and any(out_padding): + output_padding = [0] * pad_len # output padding adds additional padding on one of the side of dimension # i.e. bottom from top-bottom, # right from left-right @@ -1054,16 +1220,14 @@ def _convolution(context, node): # mapping output_padding to simplify further processing! # # For ConvTranspose2d: [bottom, right] -> [0, b, 0, r] - output_padding = [ - 0 if i % 2 == 0 else out_pad[i // 2] for i in range(len(pad)) - ] + output_padding = [0 if i % 2 == 0 else out_padding[i // 2] for i in range(pad_len)] if sum(pad) == 0 and any(output_padding): raise ValueError( "ConvTranspose configuration of padding=0 and output_padding > 0 not supported!" ) post_crop = pad.copy() pad *= 0 - for i in range(0, len(pad)): + for i in range(0, pad_len): if post_crop[i] >= output_padding[i]: post_crop[i] -= output_padding[i] else: @@ -1273,9 +1437,20 @@ def relu6(context, node): @register_torch_op def einsum(context, node): - vars = context[node.inputs[1]] - vars = promote_input_dtypes(vars) - equation = context[node.inputs[0]].val + if context.frontend == TorchFrontend.TORCHSCRIPT: + vars = context[node.inputs[1]] + vars = promote_input_dtypes(vars) + equation = context[node.inputs[0]].val + else: + equation = node.inputs[0] + if isinstance(equation, str) and equation in context: + equation = context[equation].val + tensor_names = node.inputs[1] + if isinstance(tensor_names, str) and tensor_names in context: + vars = context[tensor_names] + else: + assert isinstance(tensor_names, tuple) + vars = [context[tensor_name] for tensor_name in tensor_names] x = build_einsum_mil(vars, equation, node.name) context.add(x) @@ -1412,7 +1587,18 @@ def _calculate_pool_output_size(in_dim, kernel, stride, pad_l, pad_r, ceil_mode) return new_pad -def _max_pool(context, node, inputs): +@register_torch_op( + torch_alias=[ + "max_pool2d", + "max_pool3d", + "max_pool1d_with_indices", + "max_pool2d_with_indices", + "max_pool3d_with_indices", + ] +) +def max_pool1d(context, node): + inputs = _get_inputs(context, node, min_expected=3) + x = inputs[0] kernel_sizes = inputs[1] strides = inputs[2] @@ -1447,31 +1633,13 @@ def _max_pool(context, node, inputs): ceil_mode=ceil_mode if spatial_rank <= 2 else False, ) - if node.kind == "max_pool2d_with_indices": + if re.match(r"max_pool[123]d_with_indices", node.kind): # TODO(rdar://117038432) ([Executorch] Handle/Bind other outputs of `max_pool2d_with_indices` op during lowering) context.add((pool, None), torch_name=node.name) else: context.add(pool) -@register_torch_op -def max_pool1d(context, node): - inputs = _get_inputs(context, node, expected=6) - _max_pool(context, node, inputs) - - -@register_torch_op(torch_alias=["max_pool2d_with_indices"]) -def max_pool2d(context, node): - inputs = _get_inputs(context, node, min_expected=3) - _max_pool(context, node, inputs) - - -@register_torch_op -def max_pool3d(context, node): - inputs = _get_inputs(context, node, expected=6) - _max_pool(context, node, inputs) - - @register_torch_op def minimum(context, node): inputs = _get_inputs(context, node, expected=2) @@ -1606,34 +1774,41 @@ def sub(context, node): ] ) def mean(context, node): - inputs = _get_inputs(context, node, min_expected=1) + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=1) + nargs = len(inputs) + + x = inputs[0] + dim = inputs[1] if nargs > 1 else None + keepdim = inputs[2] if nargs > 2 else False + return x, dim, keepdim + + x, dim, keepdim = _parse_positional_args(context, node) + # torch.export may have kwargs + if context.frontend == TorchFrontend.TORCHEXPORT: + if keepdim == False: + keepdim = _get_kwinputs(context, node, "keepdim", default=[keepdim])[0] - x = inputs[0] if types.is_bool(x.dtype): # TODO: In the future when MIL op supports bool, we need to use curr_opset_version to decide # if we want to cast or not. x = mb.cast(x=x, dtype="fp32") kwargs = {"x": x, "name": node.name} - # @axes is optional, so omit if None. - axes = None if len(inputs) < 2 else inputs[1] - if axes is not None: - # @axes needs to be a list, but if only one axis was specified in the - # model, it will be constructed as an int. Construct a new constant as a - # list. - if not isinstance(axes.val, _np.ndarray): - axes = mb.const(val=[axes.val], name=axes.name + "_list") - context.add(axes) + # torch dim means Core ML axes + if dim is not None: + # Core ML axes needs to be a list, but if only one dim was specified in torch, + # it will be constructed as an int, so we construct a new constant as a list + if not isinstance(dim.val, _np.ndarray): + axes = mb.const(val=[dim.val], name=dim.name + "_list") + else: + axes = dim.val kwargs["axes"] = axes - # @keep_dims is optional. - if len(inputs) >= 3: - keep_dims = inputs[2] - kwargs["keep_dims"] = keep_dims + # torch keepdim means Core ML keep_dims + if keepdim != False: + kwargs["keep_dims"] = keepdim - # Last input to mean is an optional output tensor. We always expect this to - # be None or absent. - assert len(inputs) <= 3 or inputs[3] is None if node.kind == "sum": res = mb.reduce_sum(**kwargs) elif node.kind == "logsumexp": @@ -1665,7 +1840,7 @@ def unsqueeze(context, node): context.add(unsqueeze) -@register_torch_op(torch_alias=["sym_size.int"]) +@register_torch_op(torch_alias=["sym_size"]) def size(context, node): inputs = _get_inputs(context, node, expected=[1, 2]) x = inputs[0] @@ -1692,7 +1867,7 @@ def _shape_as_tensor(context, node): context.add(shape_node, node.name) -@register_torch_op(torch_alias=["view_copy", "reshape"]) +@register_torch_op(torch_alias=["view_copy", "_unsafe_view", "reshape"]) def view(context, node): inputs = _get_inputs(context, node, expected=2) x = inputs[0] @@ -1727,28 +1902,54 @@ def view(context, node): context.add(view) -@register_torch_op(torch_alias=['constant_pad_nd']) +@register_torch_op(torch_alias=["constant_pad_nd"]) def pad(context, node): - inputs = _get_inputs(context, node) - x = inputs[0] + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs( + context, + node, + expected={TorchFrontend.TORCHSCRIPT: [3, 4]}, + min_expected={TorchFrontend.TORCHEXPORT: 2, TorchFrontend.EXECUTORCH: 2}, + ) + nargs = len(inputs) + if context.frontend == TorchFrontend.TORCHSCRIPT: + assert (node.kind == "pad") == (nargs == 4) + assert (node.kind == "constant_pad_nd") == (nargs == 3) - pad = inputs[1] - if pad.val is not None: - pad = pad.val.reshape((-1, 2))[::-1].reshape(-1).tolist() - missing_dims = x.rank - (len(pad) // 2) - pad = [0, 0] * missing_dims + pad + x = inputs[0] + pad = inputs[1] + if pad.val is not None: + pad = pad.val.reshape((-1, 2))[::-1].reshape(-1).tolist() + missing_dims = x.rank - (len(pad) // 2) + pad = [0, 0] * missing_dims + pad + + if node.kind == "pad": + mode = "constant" + if nargs > 2: + if isinstance(inputs[2], str): + mode = inputs[2] + else: + if isinstance(inputs[2], Var) and inputs[2].val is not None: + mode = inputs[2].val + else: + raise ValueError( + "if pad mode is specified, then it must either be a string, " + "or a constant pymil variable" + ) + assert mode in ("circular", "constant", "reflect", "replicate") + scalar_val = inputs[3] if nargs > 3 else 0.0 + else: + mode = "constant" + scalar_val = inputs[2] if nargs > 2 else 0.0 + if scalar_val is None: + scalar_val = 0.0 + elif isinstance(scalar_val, Var): + assert scalar_val.val is not None + scalar_val = float(scalar_val.val) - if len(inputs) == 4: - mode = inputs[2].val - assert mode in ('constant', 'reflect', 'replicate') - val_index = 3 - else: - mode = 'constant' - val_index = 2 + return x, pad, mode, scalar_val - scalar_val = inputs[val_index] if inputs[val_index] else 0.0 - if inputs[val_index] and inputs[val_index].op.op_type == "const": - scalar_val = float(scalar_val.val) + x, pad, mode, scalar_val = _parse_positional_args(context, node) if types.is_complex(x.dtype): real, imag = (mb.pad(x=x, pad=pad, mode=mode, constant_val=scalar_val, name=node.name) for x in (mb.complex_real(data=x), mb.complex_imag(data=x))) @@ -2036,12 +2237,36 @@ def instance_norm(context, node): @register_torch_op def group_norm(context, node): - inputs = _get_inputs(context, node, expected=6) - x = inputs[0] - num_groups = inputs[1].val - weight = inputs[2] - bias = inputs[3] - eps = inputs[4] + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs( + context, + node, + expected={TorchFrontend.TORCHSCRIPT: 6}, + min_expected={TorchFrontend.TORCHEXPORT: 2, TorchFrontend.EXECUTORCH: 2}, + ) + nargs = len(inputs) + + x = inputs[0] + num_groups = inputs[1].val + + weight = inputs[2] if nargs > 2 else None + bias = inputs[3] if nargs > 3 else None + eps = inputs[4].val if nargs > 4 else 1e-5 + + return x, num_groups, weight, bias, eps + + def _parse_keyword_args(context, node, weight, bias) -> Tuple[Var]: + # Only torch.export may have kwargs + if context.frontend != TorchFrontend.TORCHEXPORT: + return weight, bias + + weight = _get_kwinputs(context, node, "weight", default=[weight])[0] + bias = _get_kwinputs(context, node, "bias", default=[bias])[0] + return weight, bias + + x, num_groups, weight, bias, eps = _parse_positional_args(context, node) + weight, bias = _parse_keyword_args(context, node, weight, bias) + n,c = x.shape[0],x.shape[1] # at minimum (N, C) required num_groups = builtins.min(num_groups,c) new_shape = [n, num_groups, c//num_groups] @@ -2062,9 +2287,9 @@ def group_norm(context, node): x = mb.reshape(x=x, shape=new_shape) mean = mb.reduce_mean(x=x, axes=axes_, keep_dims=True) - var = _std(x,axes_,True,False,eps.val) - x = mb.sub(x=x,y=mean) - x = mb.real_div(x=x,y=var) + var = _std(x, axes_, True, False, eps) + x = mb.sub(x=x, y=mean) + x = mb.real_div(x=x, y=var) x = mb.reshape(x=x, shape=input_shape) if weight is not None: weight = mb.reshape(x=weight, shape=weight_shape) @@ -2121,37 +2346,59 @@ def cat(context, node): def is_tensor_empty(var: Var) -> bool: return np.any([size == 0 for size in var.shape]) - inputs = _get_inputs(context, node, min_expected=1) + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=1) + nargs = len(inputs) - xs = inputs[0] - # PyTorch can have empty tensor, which is then ignored - # However, CoreML does not allow such empty tensor, so remove them now - if np.any([is_tensor_empty(x) for x in xs]): - xs = [x for x in xs if not is_tensor_empty(x)] + xs = inputs[0] + # PyTorch can have empty tensor, which is then ignored + # However, CoreML does not allow such empty tensor, so remove them now + if np.any([is_tensor_empty(x) for x in xs]): + xs = [x for x in xs if not is_tensor_empty(x)] - axis = 0 if len(inputs) == 1 else inputs[1] + dim = inputs[1] if nargs > 1 else 0 - concat = mb.concat( - values=promote_input_dtypes(xs), axis=axis, name=node.name - ) + return xs, dim + + def _parse_keyword_args(context, node, dim) -> Var: + # Only torch.export may have kwargs + if context.frontend != TorchFrontend.TORCHEXPORT: + return dim + + dim = _get_kwinputs(context, node, "dim", default=[dim])[0] + return dim + + xs, dim = _parse_positional_args(context, node) + dim = _parse_keyword_args(context, node, dim) + + concat = mb.concat(values=promote_input_dtypes(xs), axis=dim, name=node.name) context.add(concat) @register_torch_op def stack(context, node): - inputs = _get_inputs(context, node) + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=1) + nargs = len(inputs) - values = inputs[0] + tensors = inputs[0] - if len(inputs) < 2: - axis = 0 - else: - axis = inputs[1] + dim = inputs[1] if nargs > 1 else 0 - if len(values) == 1: - res = mb.expand_dims(x=values[0], axes=[axis.val], name=node.name) + return tensors, dim + + tensors, dim = _parse_positional_args(context, node) + # torch.export may have kwargs + if context.frontend == TorchFrontend.TORCHEXPORT: + if dim == 0: + dim = _get_kwinputs(context, node, "dim", default=[dim])[0] + if isinstance(dim, Var): + dim = dim.val + + if len(tensors) == 1: + res = mb.expand_dims(x=tensors[0], axes=[dim], name=node.name) else: - res = mb.stack(values=values, axis=axis, name=node.name) + res = mb.stack(values=tensors, axis=dim, name=node.name) context.add(res) @@ -2225,16 +2472,26 @@ def _int(context, node): @register_torch_op(torch_alias=["native_layer_norm"]) def layer_norm(context, node): - inputs = _get_inputs(context, node, min_expected=5) - _input = inputs[0] - normalized_shape = inputs[1] - weight = inputs[2] - bias = inputs[3] - eps = inputs[4] - # cudnn_enable = inputs[5] unused + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs( + context, + node, + expected={TorchFrontend.TORCHSCRIPT: [5, 6]}, + min_expected={TorchFrontend.TORCHEXPORT: 2, TorchFrontend.EXECUTORCH: 2}, + ) + nargs = len(inputs) + + x, normalized_shape = inputs[:2] + + weight = inputs[2] if nargs > 2 else None + bias = inputs[3] if nargs > 3 else None + eps = inputs[4] if nargs > 4 else None + return x, normalized_shape, weight, bias, eps + + x, normalized_shape, weight, bias, eps = _parse_positional_args(context, node) layer_norm = mb.layer_norm( - x=_input, + x=x, axes=list(range(-len(normalized_shape.val), 0)), gamma=weight, beta=bias, @@ -3075,7 +3332,7 @@ def upsample_linear1d(context, node): context.add(x) -@register_torch_op +@register_torch_op(torch_alias=["upsample_bilinear2d.vec"]) def upsample_bilinear2d(context, node): inputs = _get_inputs(context, node) _input = inputs[0] @@ -3200,7 +3457,7 @@ def upsample_nearest1d(context, node): context.add(x) -@register_torch_op +@register_torch_op(torch_alias=["upsample_nearest2d.vec"]) def upsample_nearest2d(context, node): inputs = _get_inputs(context, node) _input = inputs[0] @@ -3211,6 +3468,7 @@ def upsample_nearest2d(context, node): if ( scale_factors is not None + and isinstance(scale_factors, Var) and scale_factors.val is not None and scale_factors.rank == 1 and scale_factors.shape[0] == 2 @@ -3219,6 +3477,10 @@ def upsample_nearest2d(context, node): scale_factors = scale_factors.val scales_h = scale_factors[0] scales_w = scale_factors[1] + elif scale_factors is not None and isinstance(scale_factors, list) and len(scale_factors) == 2: + # get scale factors from provided inputs + scales_h = scale_factors[0] + scales_w = scale_factors[1] elif ( isinstance(output_size, list) and output_size[0].val is None @@ -3531,7 +3793,7 @@ def _false_path(): context.add(output_var, torch_name=output_name) -@register_torch_op(torch_alias=["select.int", "select_copy.int"]) +@register_torch_op(torch_alias=["select_copy"]) def select(context, node): inputs = _get_inputs(context, node, expected=3) _input = inputs[0] @@ -3706,29 +3968,27 @@ def _expand_list_to_rank_1(arr): def _translate_torch_tensor_assign( - x, - updates, - begin, - end, - stride, - begin_mask, - end_mask, - squeeze_mask, - name, + x: Var, + updates: Var, + begin: Var, + end: Var, + stride=None, + begin_mask=None, + end_mask=None, + squeeze_mask=None, + name=None, ): - - def torch_tensor_assign_implementation() -> Var: - return mb.torch_tensor_assign( - x=x, - updates=updates, - begin=begin, - end=end, - stride=stride, - begin_mask=begin_mask, - end_mask=end_mask, - squeeze_mask=squeeze_mask, - name=name, - ) + translation_kwargs = {} + if stride is not None: + translation_kwargs["stride"] = stride + if begin_mask is not None: + translation_kwargs["begin_mask"] = begin_mask + if end_mask is not None: + translation_kwargs["end_mask"] = end_mask + if squeeze_mask is not None: + translation_kwargs["squeeze_mask"] = squeeze_mask + if name is not None: + translation_kwargs["name"] = name if is_current_opset_version_compatible_with(target.iOS18): # slice_update is not supporting scalar update at runtime. @@ -3742,7 +4002,13 @@ def torch_tensor_assign_implementation() -> Var: if isinstance(var, Var) and var.val is None: is_begin_or_end_dynamic = True if is_begin_or_end_dynamic or any_symbolic(x.shape): - return torch_tensor_assign_implementation() + return mb.torch_tensor_assign( + x=x, + updates=updates, + begin=begin, + end=end, + **translation_kwargs, + ) # First pick up the ``dim`` in which ``squeeze_mask[dim] = True``, # and do the following transformation: @@ -3775,14 +4041,16 @@ def torch_tensor_assign_implementation() -> Var: update=updates, begin=begin, end=end, - stride=stride, - begin_mask=begin_mask, - end_mask=end_mask, - squeeze_mask=squeeze_mask, - name=name, + **translation_kwargs, ) - return torch_tensor_assign_implementation() + return mb.torch_tensor_assign( + x=x, + updates=updates, + begin=begin, + end=end, + **translation_kwargs, + ) @register_torch_op @@ -3875,9 +4143,6 @@ def select_scatter(context, node): updates=updates, begin=begin, end=end, - stride=None, - begin_mask=None, - end_mask=None, squeeze_mask=squeeze_mask, name=node.name, ) @@ -4023,6 +4288,14 @@ def index_put(context, node): ), f"indices shape {indices.shape} must equal to input shape {x.shape} for index put operation." indices = mb.cast(x=indices, dtype="int32") indices = mb.non_zero(x=indices) + + # if the indices is all False, + # we translate the op into identity + if 0 in indices.shape: + result = mb.identity(x=x, name=node.name) + context.add(result) + return + # values if values.shape == (): values = mb.expand_dims(x=values, axes=[0]) @@ -4262,7 +4535,7 @@ def ones(context, node): context, node, expected={TorchFrontend.TORCHSCRIPT: [5, 6]}, - min_expected={TorchFrontend.EXIR: 1} + min_expected={TorchFrontend.TORCHEXPORT: 1, TorchFrontend.EXECUTORCH: 1}, ) size = inputs[0] # dtype = NUM_TO_TORCH_DTYPE[inputs[1].val] unused @@ -4293,6 +4566,15 @@ def ones_like(context, node): context.add(fill) +@register_torch_op +def fill(context, node): + inputs = _get_inputs(context, node, expected=2) + shape = inputs[0].shape + value = inputs[1].val + result = mb.fill(shape=shape, value=value, name=node.name) + context.add(result) + + def _make_fill_op(size, val, name): assert val is not None if isinstance(size, list): @@ -4352,12 +4634,23 @@ def new_full(context, node): result = _make_fill_op(size, val, node.name) context.add(result) -@register_torch_op + +@register_torch_op(torch_alias=["randint.low"]) def randint(context, node): - inputs = _get_inputs(context, node, expected=(7, 8)) - low = mb.cast(x=inputs[0], dtype="fp32") - high = mb.cast(x=inputs[1], dtype="fp32") - shape = inputs[2] + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=2) + if context.frontend == TorchFrontend.TORCHSCRIPT or node.kind == "randint.low": + low = mb.cast(x=inputs[0], dtype="fp32") + high = mb.cast(x=inputs[1], dtype="fp32") + shape = inputs[2].val + else: + assert node.kind == "randint" + low = 0.0 + high = mb.cast(x=inputs[0], dtype="fp32") + shape = inputs[1].val + return low, high, shape + + low, high, shape = _parse_positional_args(context, node) rand_uniform = mb.random_uniform(shape=shape, low=low, high=high) rand_int = mb.cast(x=rand_uniform, dtype="int32", name=node.name) context.add(rand_int) @@ -4485,7 +4778,7 @@ def avg_pool1d(context, node): context, node, expected={TorchFrontend.TORCHSCRIPT : 6}, - min_expected={TorchFrontend.EXIR : 2}, + min_expected={TorchFrontend.TORCHEXPORT: 2, TorchFrontend.EXECUTORCH: 2}, ) _avg_pool(context, node, inputs) @@ -4495,7 +4788,11 @@ def avg_pool2d(context, node): inputs = _get_inputs( context, node, - min_expected={TorchFrontend.TORCHSCRIPT : 6, TorchFrontend.EXIR : 2}, + min_expected={ + TorchFrontend.TORCHSCRIPT: 6, + TorchFrontend.TORCHEXPORT: 2, + TorchFrontend.EXECUTORCH: 2, + }, ) divisor_override = None if len(inputs) < 7 else inputs[6] if divisor_override is not None: @@ -4509,7 +4806,7 @@ def avg_pool3d(context, node): context, node, expected={TorchFrontend.TORCHSCRIPT : 7}, - min_expected={TorchFrontend.EXIR : 2}, + min_expected={TorchFrontend.TORCHEXPORT: 2, TorchFrontend.EXECUTORCH: 2}, ) divisor_override = inputs[6] if divisor_override is not None: @@ -4519,14 +4816,17 @@ def avg_pool3d(context, node): @register_torch_op(torch_alias=["_log_softmax"]) def log_softmax(context, node): - inputs = _get_inputs(context, node) + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=2) + nargs = len(inputs) - x = inputs[0] - axis = inputs[1] + x = inputs[0] + axis = inputs[1] + # input 2 is dtype, so we ignore + + return x, axis - # input 2 is either out or half_to_float, so we ignore - ignored = inputs[2] - assert ignored is None or ignored.dtype == types.bool + x, axis = _parse_positional_args(context, node) res = mb.softmax(x=x, axis=axis, name=node.name + "_softmax") res = mb.log(x=res, name=node.name) @@ -4613,26 +4913,47 @@ def gelu(context, node): @register_torch_op(torch_alias=["_slice", "slice_copy"]) def slice(context, node): - inputs = _get_inputs( - context, - node, - expected={TorchFrontend.TORCHSCRIPT : 5}, - min_expected={TorchFrontend.EXIR : 1}, - ) - x = inputs[0] - dim = 0 if len(inputs) < 2 else inputs[1].val - - start = 0 - if len(inputs) > 2 and inputs[2] is not None: - start = inputs[2].val if inputs[2].val is not None else inputs[2] - - end = None - if len(inputs) > 3 and inputs[3] is not None: - end = inputs[3].val if inputs[3].val is not None else inputs[3] + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs( + context, + node, + expected=(1, 2, 3, 4, 5), + ) + nargs = len(inputs) - step = 1 - if len(inputs) > 4 and inputs[4] is not None: - step = inputs[4].val if inputs[4].val is not None else inputs[4] + x = inputs[0] + dim = inputs[1].val if nargs > 1 else 0 + start = None + if nargs > 2: + start = inputs[2] + if isinstance(start, Var) and start.val is not None: + start = start.val + end = None + if nargs > 3: + end = inputs[3] + if isinstance(end, Var) and end.val is not None: + end = end.val + step = inputs[4].val if nargs > 4 else 1 + return x, dim, start, end, step + + x, dim, start, end, step = _parse_positional_args(context, node) + # torch.export may have kwargs + if context.frontend == TorchFrontend.TORCHEXPORT: + if dim == 0: + dim = _get_kwinputs(context, node, "dim", default=[dim])[0] + if start is None: + start = _get_kwinputs(context, node, "start", default=[start])[0] + if end is None: + end = _get_kwinputs(context, node, "end", default=[end])[0] + if step == 1: + step = _get_kwinputs(context, node, "step", default=[step])[0] + # torch start = None means Core ML start = 0 + if start is None: + start = 0 + # dim must be constant + if isinstance(dim, Var): + dim = dim.val + assert dim is not None if start == 0 and end is None and step == 1: # Handling x[:], just pass through the tensor. @@ -4674,10 +4995,31 @@ def slice(context, node): @register_torch_op(torch_alias=["split_with_sizes", "split_with_sizes_copy"]) def split(context, node): - inputs = _get_inputs(context, node, expected=3) - x = inputs[0] - split_sizes = inputs[1] - dim = inputs[2].val + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=2) + nargs = len(inputs) + + x = inputs[0] + split_sizes = inputs[1] + dim = inputs[2] if nargs > 2 else 0 + return x, split_sizes, dim + + def _parse_keyword_args(context, node, dim) -> Var: + # Only torch.export may have kwargs + if context.frontend != TorchFrontend.TORCHEXPORT: + return dim + + dim = _get_kwinputs(context, node, "dim", default=[dim])[0] + return dim + + def _translate_torch_args(dim) -> Var: + if isinstance(dim, Var): + dim = dim.val + return dim + + x, split_sizes, dim = _parse_positional_args(context, node) + dim = _parse_keyword_args(context, node, dim) + dim = _translate_torch_args(dim) if not isinstance(split_sizes.val, _np.ndarray): shape = mb.shape(x=x) @@ -4703,21 +5045,25 @@ def split(context, node): context.add(res, torch_name=node.name) -@register_torch_op(torch_alias=["unbind.int"]) +@register_torch_op def unbind(context, node): - inputs = _get_inputs( - context, - node, - expected={ - TorchFrontend.TORCHSCRIPT: 2, - TorchFrontend.EXIR: [1, 2], - }, - ) - x = inputs[0] - if len(inputs) == 1: - dim = 0 - else: - dim = inputs[1].val + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, expected=(1, 2)) + nargs = len(inputs) + + x = inputs[0] + dim = inputs[1] if nargs > 1 else 0 + + return x, dim + + x, dim = _parse_positional_args(context, node) + # torch.export may have kwargs + if context.frontend == TorchFrontend.TORCHEXPORT: + if dim == 0: + dim = _get_kwinputs(context, node, "dim", default=[dim])[0] + if isinstance(dim, Var): + dim = dim.val + split_sizes = [1] * x.shape[dim] if len(split_sizes) == 1: res = [mb.squeeze(x=x, axes=[dim])] @@ -4890,6 +5236,49 @@ def expand_as(context, node): context.add(res) +@register_torch_op( + torch_alias=[ + "atleast_2d", + "atleast_3d", + "atleast_1d.sequence", + "atleast_2d.sequence", + "atleast_3d.sequence", + ] +) +def atleast_1d(context, node): + def _maybe_expand_dims(x: Var, rank: int, name: Optional[str] = None) -> Var: + if x.rank < rank: + if rank == 3: + if x.rank == 2: + axes = [2] + elif x.rank == 1: + axes = [0, 2] + else: + axes = [0, 1, 2] + else: + axes = [*range(rank - x.rank)] + kwargs = {"x": x, "axes": axes} + if name is not None: + kwargs["name"] = name + x = mb.expand_dims(**kwargs) + return x + + inputs = _get_inputs(context, node, expected=1)[0] + rank = int(node.kind[8]) + assert rank in (1, 2, 3) + + if isinstance(inputs, (tuple, list)): + results = [] + for x in inputs: + results.append(_maybe_expand_dims(x, rank)) + else: + assert isinstance(inputs, Var) + x = inputs + results = _maybe_expand_dims(x, rank, node.name) + + context.add(results, torch_name=node.name) + + def _arange( context, node_name: str, @@ -4905,32 +5294,60 @@ def _arange( context.add(res) -@register_torch_op +@register_torch_op(torch_alias=["arange.start"]) def arange(context, node): - inputs = _get_inputs(context, node) - # dtype = inputs[-4] - # layout = inputs[-3] - # device = inputs[-2] - # pin_memory = inputs[-1] - if len(inputs) == 1 or len(inputs) == 5: - # inputs are [end] or [end, dtype, layout, device, pin_memory] - start = 0 - end = inputs[0] - step = 1 - elif len(inputs) == 6: - # inputs are [start, end, dtype, layout, device, pin_memory] - start = inputs[0] - end = inputs[1] - step = 1 - elif len(inputs) == 7: - # inputs are [start, end, step, dtype, layout, device, pin_memory] - start = inputs[0] - end = inputs[1] - step = inputs[2] - else: - raise ValueError( - "arange must have exactly 5, 6, or 7 inputs, got {}".format(len(inputs)) - ) + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=1) + nargs = len(inputs) + + if context.frontend == TorchFrontend.TORCHSCRIPT: + # dtype = inputs[-4] + # layout = inputs[-3] + # device = inputs[-2] + # pin_memory = inputs[-1] + if nargs == 1 or nargs == 5: + # inputs are [end] or [end, dtype, layout, device, pin_memory] + start = 0 + end = inputs[0] + step = 1 + elif nargs == 6: + # inputs are [start, end, dtype, layout, device, pin_memory] + start = inputs[0] + end = inputs[1] + step = 1 + elif nargs == 7: + # inputs are [start, end, step, dtype, layout, device, pin_memory] + start = inputs[0] + end = inputs[1] + step = inputs[2] + else: + raise ValueError(f"arange must have exactly 5, 6, or 7 inputs, got {nargs}") + else: + if re.match(r"arange\.start.*", node.kind): + start = inputs[0] + assert nargs > 1, "arange.start has at least 2 positional args: start, end" + end = inputs[1] + if node.kind == "arange.start_step": + step = inputs[2] if nargs > 2 else 1 + else: + step = 1 + else: + start = 0 + end = inputs[0] + step = 1 + + return start, end, step + + def _parse_keyword_args(context, node, step) -> Var: + # Only torch.export may have kwargs + if context.frontend != TorchFrontend.TORCHEXPORT: + return step + + step = _get_kwinputs(context, node, "step", default=[step])[0] + return step + + start, end, step = _parse_positional_args(context, node) + step = _parse_keyword_args(context, node, step) _arange(context, node.name, start, end, step) @@ -4964,7 +5381,7 @@ def masked_fill(context, node): context.add(res) -@register_torch_op +@register_torch_op(torch_alias=["meshgrid.indexing"]) def meshgrid(context, node): """ For N input tensors, a meshgrid is constructed by viewing each tensor as an N-dimension tensor @@ -4976,22 +5393,31 @@ def meshgrid(context, node): N, N-dimenional grids, where the ith grid is defined as expanding the ith input over dimensions defined by the other inputs. """ - supported_indexing_modes = ("ij", "xy") - indexing = "ij" - inputs = _get_inputs(context, node, expected=[1, 2]) - if len(inputs) == 2: - indexing = inputs[1].val - if indexing not in supported_indexing_modes: - raise ValueError("indexing mode {} not supported".format(indexing)) + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, expected=[1, 2]) + nargs = len(inputs) + + tensor_inputs = inputs[0] + indexing = inputs[1].val if nargs > 1 else "ij" + return tensor_inputs, indexing - tensor_inputs = inputs[0] - assert isinstance(tensor_inputs, (list, tuple)) - if len(tensor_inputs) < 2: - raise ValueError("Requires >= 2 tensor inputs.") + def _check_args(tensor_inputs, indexing) -> None: + assert isinstance(tensor_inputs, (list, tuple)) + if len(tensor_inputs) < 2: + raise ValueError("Requires >= 2 tensor inputs.") + if any([len(tensor_var.shape) > 1 for tensor_var in tensor_inputs]): + raise ValueError("meshgrid received non-1d tensor.") - if any([len(tensor_var.shape) > 1 for tensor_var in tensor_inputs]): - raise ValueError("meshgrid received non-1d tensor.") + if indexing not in ("ij", "xy"): + raise ValueError(f"indexing mode {indexing} not supported") + + tensor_inputs, indexing = _parse_positional_args(context, node) + # torch.export may have kwargs + if context.frontend == TorchFrontend.TORCHEXPORT: + if indexing == "ij": + indexing = _get_kwinputs(context, node, "indexing", default=[indexing])[0] + _check_args(tensor_inputs, indexing) dim_tuple = tuple(tensor_var.shape[0] for tensor_var in tensor_inputs) @@ -5025,6 +5451,9 @@ def meshgrid(context, node): # Defines all the nodes that are noOps @register_torch_op( torch_alias=[ + "_assert_async.msg", + "_assert_scalar", + "_local_scalar_dense", "alias_copy", "clone", "contiguous", @@ -5038,9 +5467,14 @@ def meshgrid(context, node): ) def noop(context, node): logger.info(f"Setting pytorch op: {node.kind} to no-op.") - inputs = _get_inputs(context, node) - _input = inputs[0] - context.add(_input, torch_name=node.name) + # These noops do not produce output + if node.kind in ("_assert_scalar",): + return + # Other noops return input as output + else: + inputs = _get_inputs(context, node) + _input = inputs[0] + context.add(_input, torch_name=node.name) @register_torch_op @@ -5062,7 +5496,7 @@ def zeros_like(context, node): context, node, expected={TorchFrontend.TORCHSCRIPT: 6}, - min_expected={TorchFrontend.EXIR: 1}, + min_expected={TorchFrontend.TORCHEXPORT: 1, TorchFrontend.EXECUTORCH: 1}, ) x = inputs[0] shape = mb.shape(x=x) @@ -5295,12 +5729,26 @@ def repeat(context, node): context.add(mb.tile(x=x, reps=reps, name=node.name)) -@register_torch_op +@register_torch_op(torch_alias=["repeat_interleave.self_tensor", "repeat_interleave.self_int"]) def repeat_interleave(context, node): """ For now, we only support scalar repeats + None or 0 dim """ + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs( + context, + node, + expected={TorchFrontend.TORCHSCRIPT: 4}, + min_expected={TorchFrontend.TORCHEXPORT: 2, TorchFrontend.EXECUTORCH: 2}, + ) + nargs = len(inputs) + + x = inputs[0] + repeats = inputs[1] + dim = inputs[2] if nargs > 2 else None + return x, repeats, dim + def repeat_interleave_dim0(x: Var, repeats_val: int, name: str = None) -> Var: """ on a high level: @@ -5321,27 +5769,35 @@ def repeat_interleave_dim0(x: Var, repeats_val: int, name: str = None) -> Var: result """ + translation_kwargs = {} + if name is not None: + translation_kwargs["name"] = name + + x_shape = mb.shape(x=x) + reps = [1] * x.rank reps[0] = repeats_val x_tiled = mb.tile(x=x, reps=reps) - split_reps = [repeats_val] + list(x.shape) - x_reshaped = mb.reshape(x=x_tiled, shape=list(split_reps)) + split_reps_shape = mb.concat(values=([repeats_val], x_shape), axis=0) + x_reshaped = mb.reshape(x=x_tiled, shape=split_reps_shape) perm = [*range(x.rank + 1)] perm[0] = 1 perm[1] = 0 x_transposed = mb.transpose(x=x_reshaped, perm=perm) - result_shape = list(x.shape) - result_shape[0] = -1 - if name is None: - result = mb.reshape(x=x_transposed, shape=result_shape) - else: - result = mb.reshape(x=x_transposed, shape=result_shape, name=node.name) + x_unaffected_sizes = mb.slice_by_index(x=x_shape, begin=[1], end=[x.rank]) + result_shape = mb.concat(values=([-1], x_unaffected_sizes), axis=0) + result = mb.reshape(x=x_transposed, shape=result_shape, **translation_kwargs) + return result - x, repeats, dim, _ = _get_inputs(context, node, expected=4) + x, repeats, dim = _parse_positional_args(context, node) + # torch.export may have kwargs + if context.frontend == TorchFrontend.TORCHEXPORT: + if dim is None: + dim = _get_kwinputs(context, node, "dim", default=[dim])[0] repeats_val = repeats.val if isinstance(repeats_val, np.ndarray): @@ -5357,17 +5813,26 @@ def repeat_interleave_dim0(x: Var, repeats_val: int, name: str = None) -> Var: if dim is None: x = mb.reshape(x=x, shape=(-1,)) else: + dim_val = dim.val + assert dim_val is not None, "torch.repeat_interleave uses static dim" + if dim_val < 0: + dim_val += x.rank # non-0 dim requires additional pre and post treatment - if dim.val != 0: + if dim_val != 0: is_dim_0 = False + # quick return: repeat 1 is noop + if repeats_val == 1: + context.add(x, torch_name=node.name) + return + if is_dim_0: result = repeat_interleave_dim0(x, repeats_val, node.name) else: # pre treatment: permute to have dim 0 - perm2dim0 = [dim.val] + perm2dim0 = [dim_val] for i in range(x.rank): - if i != dim.val: + if i != dim_val: perm2dim0.append(i) x = mb.transpose(x=x, perm=perm2dim0) @@ -5523,11 +5988,18 @@ def clamp(context, node): @register_torch_op def triu(context, node): - inputs = _get_inputs(context, node, expected=2) + assert context.frontend != TorchFrontend.EXECUTORCH, "triu is not a core aten op" + inputs = _get_inputs( + context, + node, + expected={ + TorchFrontend.TORCHSCRIPT: 2, + TorchFrontend.TORCHEXPORT: [1, 2], + }, + ) x = inputs[0] - diagonal = inputs[1] - if diagonal is not None and diagonal.val is not None: - diagonal = diagonal.val + if len(inputs) > 1 and inputs[1] is not None and inputs[1].val is not None: + diagonal = inputs[1].val else: diagonal = 0 if diagonal <= 0: @@ -5540,11 +6012,18 @@ def triu(context, node): @register_torch_op def tril(context, node): - inputs = _get_inputs(context, node, expected=2) + assert context.frontend != TorchFrontend.EXECUTORCH, "tril is not a core aten op" + inputs = _get_inputs( + context, + node, + expected={ + TorchFrontend.TORCHSCRIPT: 2, + TorchFrontend.TORCHEXPORT: [1, 2], + }, + ) x = inputs[0] - diagonal = inputs[1] - if diagonal is not None and diagonal.val is not None: - diagonal = diagonal.val + if len(inputs) > 1 and inputs[1] is not None and inputs[1].val is not None: + diagonal = inputs[1].val else: diagonal = 0 if diagonal >= 0: @@ -5911,7 +6390,7 @@ def copy(context, node): "In torch script frontend, by graph pass `generate_tensor_assignment_ops`, " "`torch.copy_` should have been replaced with `_internal_op_tensor_inplace_copy`" ) - if context.frontend == TorchFrontend.EXIR: + if context.frontend in TORCH_EXPORT_BASED_FRONTENDS: src = inputs[1] if inputs[0].shape != src.shape: _, src = _broadcast_tensors(inputs[: 2]) @@ -6173,10 +6652,31 @@ def _solve_broadcast_shape(shapes: List[List[int]]) -> List[np.ndarray]: dims = [shapes[j][i] for j in range(len(shapes))] if any_symbolic(dims): # rdar://85559497 (Handle dynamic shapes inputs broadcast for pytorch) - raise NotImplementedError( - "Only static shaped inputs are supported for torch.broadcast_tensors conversion." - ) - result_shape.append(_np.max(dims)) + symbols = set() + integers = set() + for dim in dims: + if is_symbolic(dim): + symbols.add(dim) + else: + integers.add(dim) + # Integers can be safely ignored + if integers == {1} or integers == set(): + result_dim = list(symbols)[0] + result_shape.append(result_dim) + # In principle, there must be only 1 symbol + # In practise, since our symbol propagation is imperfect, + # we may see multiple symbols, even if they must equal to each other / 1 + if len(symbols) != 1: + logger.warning(f"Recklessly broadcast {symbols} to {result_dim}") + # In principle, in such case the symbols must be 1 or equal to the integer + # In practise, since our symbol propagation is imperfect, + # we may still see symbols, even if they must equal to max integer / 1 + else: + result_dim = _np.max(list(integers)) + result_shape.append(result_dim) + logger.warning(f"Recklessly broadcast {symbols} and {integers} to {result_dim}") + else: + result_shape.append(_np.max(dims)) return result_shape def _broadcast_tensors(tensors): @@ -6859,7 +7359,13 @@ def _cast_bool_attn_mask(attn_mask: Var, query_var: Var) -> Var: ) return mb.mul(x=-3e4, y=compliment_of_mask) -@register_torch_op(torch_alias=["_scaled_dot_product_flash_attention_for_cpu"]) +@register_torch_op( + torch_alias=[ + "_scaled_dot_product_flash_attention_for_cpu", + "coreml.sdpa", + "coreml::sdpa", + ] +) def scaled_dot_product_attention(context, node): """ Input shapes/types: @@ -6888,47 +7394,70 @@ def _broadcast_tensor_to_same_batch_dims(x: Var, batch_dims: List[int]) -> Var: broadcast_shape = batch_dims + list(x.shape[-2:]) return _broadcast(x.name + "_broadcast_same_batch_dims", x, broadcast_shape) - inputs = _get_inputs(context, node, min_expected=3) - q, k, v = inputs[:3] - attn_mask = None if len(inputs) < 4 else inputs[3] - dropout = 0.0 if len(inputs) < 5 else inputs[4] - is_causal = False if len(inputs) < 6 else inputs[5].val + def _parse_positional_args(context, node) -> Tuple[Var]: + inputs = _get_inputs(context, node, min_expected=3) + nargs = len(inputs) + + q, k, v = inputs[:3] + + if node.kind == "scaled_dot_product_attention": + attn_mask = inputs[3] if nargs > 3 else None + dropout = inputs[4] if nargs > 4 else 0.0 + is_causal = inputs[5].val if nargs > 5 else False + scale = inputs[6] if nargs > 6 else None + elif node.kind == "_scaled_dot_product_flash_attention_for_cpu": + dropout = inputs[3] if nargs > 3 else 0.0 + is_causal = inputs[4].val if nargs > 4 else False + attn_mask = inputs[5] if nargs > 5 else None + scale = inputs[6] if nargs > 6 else None + else: + assert node.kind in ("coreml.sdpa", "coreml::sdpa") + attn_mask = inputs[3] if nargs > 3 else None + dropout = 0.0 + is_causal = False + scale = None - # When len(inputs) == 7, the inputs are (q, k, v, attn_mask, dropout, is_causal, scale) - if len(inputs) == 7 and inputs[6] is not None: - raise NotImplementedError( - "scaled_dot_product_attention op: scale parameter is not handled." - ) + return q, k, v, attn_mask, dropout, is_causal, scale - if attn_mask is not None and is_causal: - raise ValueError( - "scaled_dot_product_attention op: attn_mask cannot be provided when is_causal is set to True." - ) + def _check_args(q, k, v, attn_mask, dropout, is_causal, scale) -> None: + if attn_mask is not None and is_causal: + raise ValueError( + "scaled_dot_product_attention op: attn_mask cannot be provided when is_causal is set to True." + ) - if dropout is not None: - if isinstance(dropout, Var): - if dropout.val is None: - raise NotImplementedError( - "A variable dropout probability is specified. Since Core ML " - "does not support dropout yet, we cowardly refuse to convert it" + if dropout is not None: + if isinstance(dropout, Var): + if dropout.val is None: + raise NotImplementedError( + "A variable dropout probability is specified. Since Core ML " + "does not support dropout yet, we cowardly refuse to convert it" + ) + else: + dropout = dropout.val + if dropout != 0.0: + raise ValueError( + "A non-zero dropout probability is specified. Since Core ML " + "does not support dropout yet, we cannot convert it" ) - else: - dropout = dropout.val - if dropout != 0.0: + + # check that ranks of q, k, v and attn_mask match + if k.rank != q.rank: raise ValueError( - "A non-zero dropout probability is specified. Since Core ML " - "does not support dropout yet, we cannot convert it" + "Rank of query and key do not match in scaled_dot_product_attention torch op" + ) + if v.rank != q.rank: + raise ValueError( + "Rank of query and value do not match in scaled_dot_product_attention torch op" ) - # check that ranks of q, k, v and attn_mask match - if k.rank != q.rank: - raise ValueError( - "Rank of query and key do not match in scaled_dot_product_attention torch op" - ) - if v.rank != q.rank: - raise ValueError( - "Rank of query and value do not match in scaled_dot_product_attention torch op" - ) + q, k, v, attn_mask, dropout, is_causal, scale = _parse_positional_args(context, node) + # torch.export may have kwargs + if context.frontend == TorchFrontend.TORCHEXPORT: + if attn_mask is None: + attn_mask = _get_kwinputs(context, node, "attn_mask", default=[attn_mask])[0] + if scale is None: + scale = _get_kwinputs(context, node, "scale", default=[scale])[0] + _check_args(q, k, v, attn_mask, dropout, is_causal, scale) mask = None if is_causal: @@ -6941,7 +7470,8 @@ def _broadcast_tensor_to_same_batch_dims(x: Var, batch_dims: List[int]) -> Var: mask = attn_mask # Since ios18, Core ML supports scaled_dot_product_attention op - if is_current_opset_version_compatible_with(target.iOS18): + # It does not have scale, though + if is_current_opset_version_compatible_with(target.iOS18) and scale is None: # ios18 scaled_dot_product_attention only supports rank >= 3 is_rank_2 = q.rank == 2 @@ -6972,7 +7502,7 @@ def _broadcast_tensor_to_same_batch_dims(x: Var, batch_dims: List[int]) -> Var: # For ios18-, scaled_dot_product_attention has to be decomposed else: - res = _utils._decompose_scaled_dot_product_attention(q, k, v, mask, node.name) + res = _utils._decompose_scaled_dot_product_attention(q, k, v, mask, node.name, scale=scale) context.add(res) diff --git a/coremltools/converters/mil/frontend/torch/quantization_ops.py b/coremltools/converters/mil/frontend/torch/quantization_ops.py index e47965850..11aab6f4c 100644 --- a/coremltools/converters/mil/frontend/torch/quantization_ops.py +++ b/coremltools/converters/mil/frontend/torch/quantization_ops.py @@ -3,12 +3,14 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause - import numpy as _np import torch as _torch +from packaging.version import Version from coremltools import _logger as logger +from coremltools._deps import _HAS_TORCHAO, MSG_TORCHAO_NOT_FOUND from coremltools.converters.mil.frontend import _utils +from coremltools.converters.mil.frontend.torch.ops import NUM_TO_NUMPY_DTYPE from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Var, types @@ -16,12 +18,17 @@ from .torch_op_registry import register_torch_op from .utils import ( NUM_TO_TORCH_DTYPE, + TORCH_DTYPE_TO_NUM, + TORCH_EXPORT_BASED_FRONTENDS, TORCH_QTYPE_TO_NP_TYPE, TORCH_QTYPE_TO_STR, TYPE_TO_DTYPE_STRING, TorchFrontend, ) +if _HAS_TORCHAO: + from torchao.quantization import quant_primitives as torchao_quant + def _quantize_general( context, @@ -94,17 +101,22 @@ def quantize_per_tensor(context, node): inputs = _get_inputs( context, node, - expected={TorchFrontend.TORCHSCRIPT: 4, TorchFrontend.EXIR: 6}, + expected={ + TorchFrontend.TORCHSCRIPT: 4, + TorchFrontend.TORCHEXPORT: 6, + TorchFrontend.EXECUTORCH: 6, + }, ) - assert context.frontend in (TorchFrontend.TORCHSCRIPT, TorchFrontend.EXIR) if context.frontend == TorchFrontend.TORCHSCRIPT: input, scale, zero_point, torch_dtype = inputs - elif context.frontend == TorchFrontend.EXIR: + elif context.frontend in TORCH_EXPORT_BASED_FRONTENDS: input, scale, zero_point, qmin, qmax, torch_dtype = inputs if qmax.val - qmin.val <= 16: logger.warning( f"Core ML does not support 4-bit activation, so {torch_dtype.val} is used instead" ) + else: + raise ValueError(f"Invalid PyTorch frontend {context.frontend}") _quantize_general(context, node, input, scale, zero_point, torch_dtype) @@ -119,6 +131,17 @@ def quantize_per_channel(context, node): _quantize_general(context, node, input, scale, zero_point, torch_dtype, axis.val) +@register_torch_op( + torch_alias=[ + "quantized_decomposed::choose_qparams_per_token_asymmetric", + "quantized_decomposed.choose_qparams_per_token_asymmetric", + ] +) +def choose_qparams_per_token_asymmetric(context, node): + """PyTorch uses this op to calculate scale and zero_point on-the-fly for input data.""" + raise NotImplementedError("Dynamic activation quantization is not supported in Core ML.") + + def _dequantize_general( context, node, @@ -194,8 +217,10 @@ def _dequantize_general( def dequantize(context, node): if context.frontend == TorchFrontend.TORCHSCRIPT: context.quant_context.get_dequantized_var(node.inputs[0], node.name) - elif context.frontend == TorchFrontend.EXIR: - inputs = _get_inputs(context, node, min_expected={TorchFrontend.EXIR: 6}) + elif context.frontend in TORCH_EXPORT_BASED_FRONTENDS: + inputs = _get_inputs( + context, node, min_expected={TorchFrontend.TORCHEXPORT: 6, TorchFrontend.EXECUTORCH: 6} + ) num_inputs = len(inputs) if num_inputs == 6: input, scale, zero_point, qmin, qmax, _ = inputs @@ -206,10 +231,7 @@ def dequantize(context, node): raise ValueError(f"dequantize should have 6 or 7 inputs, but got {num_inputs}") _dequantize_general(context, node, input, scale, zero_point, axis, qmin, qmax) else: - raise ValueError( - "dequantize is supported only in TorchScript and EXIR frontends, " - f"but got {context.frontend}" - ) + raise ValueError(f"Invalid PyTorch frontend {context.frontend}") def _dequantized_weight(qweight, name: str = None): @@ -457,3 +479,234 @@ def quantized_embedding(context, node): # Changing the axis from 0 is not an option in torch, so we don't expose it gather = mb.gather(x=dequant_weights, indices=indices, name=node.name) context.add(gather) + + +@register_torch_op( + torch_alias=[ + "quantized_decomposed::embedding_4bit", + "quantized_decomposed::embedding_4bit.dtype", + "quantized_decomposed.embedding_4bit", + "quantized_decomposed.embedding_4bit.dtype", + ] +) +def quantized_embedding_4bit(context, node): + """Lower the 4-bit quantized embedding op used in executorch.""" + inputs = _get_inputs(context, node, expected=[6, 7]) + weight = inputs[0].val + weight_scales = inputs[1].val + weight_zero_points = None + if inputs[2] is not None and inputs[2].val is not None: + weight_zero_points = inputs[2].val + weight_quant_min = inputs[3].val + weight_quant_max = inputs[4].val + indices = inputs[5] + + out_np_dtype = None + if len(inputs) > 6: + if isinstance(inputs[6].val, _torch.dtype): + out_np_dtype = NUM_TO_NUMPY_DTYPE[TORCH_DTYPE_TO_NUM[inputs[6].val]] + elif isinstance(inputs[6].val, (int, _np.generic)): + out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[6].val] + if out_np_dtype is not None: + weight_scales = weight_scales.astype(out_np_dtype) + + if weight_quant_min == 0 and weight_quant_max == 0: + # Executorch wrongly passes both weight_quant_min and weight_quant_max. We should set it to correct numbers. + signed = True + weight_quant_min = -8 + weight_quant_max = 7 + else: + signed = weight_quant_min < 0 + + quant_low = -8 if signed else 0 + quant_high = 7 if signed else 15 + quant_torch_dtype = _torch.int8 if signed else _torch.uint8 + if weight_quant_min != quant_low: + raise ValueError( + f"The weight_quant_min should be {quant_low} for 4-bit embedding, but got {weight_quant_min}." + ) + if weight_quant_max != quant_high: + raise ValueError( + f"The weight_quant_max should be {quant_high} for 4-bit embedding, but got {weight_quant_max}." + ) + + # Unpack the weight to the normal layout. + with _torch.no_grad(): + weight = _torch.from_numpy(weight) + # The original weight was packed by using 8-bit to represent two numbers, so we need to separate them. + help_move_bits = 2**4 + weight_even = weight.div(help_move_bits, rounding_mode="trunc") + weight_odd = weight.remainder(help_move_bits) + weight_unpacked = _torch.stack((weight_even, weight_odd), dim=-1) + weight = weight_unpacked.view(weight.shape[0], -1) + weight = weight.view(quant_torch_dtype).add(weight_quant_min).numpy() + + if not _np.logical_and(weight >= quant_low, weight <= quant_high).all(): + raise ValueError( + f"All elements in weight should be within 4-bit range ({quant_low} to {quant_high})." + ) + + quantized_np_dtype = types.nptype_from_builtin( + types.string_to_builtin("int4" if signed else "uint4") + ) + dequant_weight = _utils._construct_constexpr_dequant_op( + weight.astype(quantized_np_dtype), + weight_zero_points, + weight_scales, + axis=-1, + name=inputs[0].name, + ) + + gather = mb.gather(x=dequant_weight, indices=indices, name=node.name) + context.add(gather) + + +@register_torch_op +def _convert_weight_to_int4pack(context, node): + """Pack weight to int4pack format which will be fed into `_weight_int4pack_mm` op.""" + inputs = _get_inputs(context, node, expected=2) + x = inputs[0].val + inner_k_tiles = inputs[1].val + + if x is None or inner_k_tiles is None: + raise NotImplementedError( + "For `_convert_weight_to_int4pack` op, we only support static case, where x, " + "and inner_k_tiles are all known during compilation time." + ) + + with _torch.no_grad(): + x_int4packed = _torch._convert_weight_to_int4pack( + _torch.from_numpy(x), inner_k_tiles + ).numpy() + + res = mb.const(val=x_int4packed, name=node.name) + context.add(res) + + +@register_torch_op +def _weight_int4pack_mm(context, node): + """ + The first argument is the same as torch.mm, but the second argument (weight) is packed. + The packed weight has rank=4, because the meta registration in dynamo requires operator has the same output shape + for each device. So it creates a fake shape {N / 8, K / (16 * innerKTiles), 32, innerKTiles / 2} for CPU. + + More specifically: + + # Original torch.mm + torch.mm(a, b) + + # The int4 packed version mm + b_uint8, b_scales_and_zeros = _group_quantize_tensor( + b, n_bit=4, q_group_size=q_group + ) + b_int4pack = torch._convert_weight_to_int4pack( + b_uint8, inner_k_tiles + ) + weight_int4pack_mm(a, b_int4pack, b_scales_and_zeros) + """ + if Version(_torch.__version__) < Version("2.4.0"): + raise AssertionError("To lower _weight_int4pack_mm, requires torch >= 2.4.0") + + logger.warning( + "The current conversion of `_weight_int4pack_mm` op only works with model produced by torchao. " + "If the op is produced by other libs, you may observe large numerical discrepancy." + ) + + if not _HAS_TORCHAO: + raise AssertionError( + f"{MSG_TORCHAO_NOT_FOUND}\n torchao is needed to convert torch blockwise quantized model." + ) + + inputs = _get_inputs(context, node, expected=4) + x = inputs[0] + y_int4pack = inputs[1].val + group_size = inputs[2].val + y_scales_and_zeros = inputs[3].val + + if y_int4pack is None or group_size is None or y_scales_and_zeros is None: + raise NotImplementedError( + "For `_weight_int4pack_mm` op, we only support static case, where y_int4pack, " + "group_size, y_scales_and_zeros are all known during compilation time." + ) + + if not (len(y_scales_and_zeros.shape) == 3 and y_scales_and_zeros.shape[2] == 2): + raise ValueError( + "The scales_and_zeros from torch should have 3 dims and last dim has size 2." + ) + scales = _np.transpose(y_scales_and_zeros[:, :, 0]) + zero_points = _np.transpose(y_scales_and_zeros[:, :, 1]) + + if _np.allclose(zero_points, zero_points.astype("int32")): + zero_points = zero_points.astype("int32") + else: + zero_points = zero_points.astype(_np.float32) + + # Unpack the result of `torch._convert_weight_to_int4pack` back to plain layout. + # TODO: Use `torchao.ops.unpack_tensor_core_tiled_layout` to unpack after it has CPU implementation. + # The current way to unpack by using _weight_int4pack_mm with eye matrix is a workaround on CPU. + if len(y_int4pack.shape) != 4: + raise ValueError( + f"The packed y from torch should have 4 dims, but got {len(y_int4pack.shape)}." + ) + inner_k_tiles = y_int4pack.shape[-1] * 2 + y_unpacked_shape = (y_int4pack.shape[0] * 8, y_int4pack.shape[1] * (inner_k_tiles * 16)) + eye_shape = y_unpacked_shape[1] + quant_min = 0 + quant_max = 2**4 - 1 + with _torch.no_grad(): + y_dequantized = ( + _torch._weight_int4pack_mm( + _torch.eye(eye_shape, device=_torch.device("cpu"), dtype=_torch.float32), + _torch.from_numpy(y_int4pack), + group_size, + _torch.from_numpy(y_scales_and_zeros).float(), + ) + .t() + .contiguous() + ) + zero_point_domain = ( + torchao_quant.ZeroPointDomain.INT + if _np.issubdtype(zero_points.dtype, _np.integer) + else torchao_quant.ZeroPointDomain.FLOAT + ) + y_quantized = torchao_quant.quantize_affine( + y_dequantized, + (1, group_size), + _torch.from_numpy(scales), + _torch.from_numpy(zero_points), + _torch.int32, + quant_min=quant_min, + quant_max=quant_max, + zero_point_domain=zero_point_domain, + ) + y_quantized = y_quantized.numpy().astype(_np.uint8) + if len(y_quantized.shape) != 2: + raise ValueError( + f"The unpacked quantized y should have 2 dims, but got {len(y_quantized.shape)}." + ) + if not _np.logical_and(y_quantized >= 0, y_quantized <= 15).all(): + raise ValueError("All elements should be within 4-bit range (0 to 15).") + + # If zero_point_domain in `quantize_affine` is set to `ZeroPointDomain.INT`, it matches with CoreML implementation: + # quant = torch.clamp(torch.round(input * (1.0 / scale)) + zero_point, quant_min, quant_max) + # However, for `ZeroPointDomain.FLOAT`, torchao did following transformations to make it compatible with `tinygemm`: + # mid_point = (quant_max + quant_min + 1) / 2 + # min_val = zero_point - scale * mid_point + # quant = torch.clamp(torch.round((input - min_val) / scale), quant_min, quant_max)) + # As we want to make sure the quantize matches CoreML dequant op, we have to do following transformations: + # dequant = (quant - mid_point) * scale + zp + # so we can re-write the expression as + # dequant = (quant - (mid_point - zp / scale)) * scale + # which means the zero_point in CoreML is actually `mid_point - zp / scale`. + if not _np.issubdtype(zero_points.dtype, _np.integer): + mid_point = (quant_max + quant_min + 1) / 2 + zero_points = mid_point - zero_points / scales + + # Use MIL constexpr op to represent the quantization. + quantized_np_dtype = types.nptype_from_builtin(types.string_to_builtin("uint4")) + dequant_weights = _utils._construct_constexpr_dequant_op( + y_quantized.astype(quantized_np_dtype), zero_points, scales, axis=-1, name=inputs[1].name + ) + + res = mb.linear(x=x, weight=dequant_weights, name=node.name) + context.add(res) diff --git a/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py b/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py index 9331bacf9..f16663864 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py +++ b/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py @@ -1412,7 +1412,8 @@ def test_max_pool2d( ceil_mode, ], "max_pool2d", - ops.max_pool2d, + # Using ops.max_pool1d because max_pool2d is its alias + ops.max_pool1d, expected_result, ) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py b/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py index d6845aae1..f76d89734 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py @@ -12,6 +12,7 @@ import numpy as np import pytest +from packaging.version import Version from PIL import Image import coremltools as ct @@ -19,8 +20,10 @@ _HAS_EXECUTORCH, _HAS_HF, _HAS_TORCH, + _HAS_TORCHAO, MSG_EXECUTORCH_NOT_FOUND, MSG_TORCH_NOT_FOUND, + MSG_TORCHAO_NOT_FOUND, ) from coremltools.converters.mil.frontend.torch.test.testing_utils import _copy_input_data from coremltools.converters.mil.frontend.torch.torch_op_registry import ( @@ -59,6 +62,9 @@ if _HAS_EXECUTORCH: import executorch.exir +if _HAS_TORCHAO: + from torchao.quantization import quant_api + from torchao.utils import unwrap_tensor_subclass @pytest.fixture def torch_model(): @@ -2842,3 +2848,111 @@ def test_iO16_default_fp32_io(self, float32_input_model_add_op): output_dtype="fp32", expected_op_list=["add"], ) + + +@pytest.mark.skipif( + Version(torch.__version__) < Version("2.4.0"), + reason="Most torchao functionalities only work with PyTorch 2.4.0+", +) +@pytest.mark.skipif( + ct.utils._macos_version() < (15, 0), + reason="Torchao block-wise quantization requires MacOS 15+.", +) +@pytest.mark.skipif(not _HAS_TORCHAO, reason=MSG_TORCHAO_NOT_FOUND) +class TestTorchao: + """ + This class tests the torchao quantized model conversion. + """ + + @staticmethod + def _construct_test_model(): + # The old Quantizer method in torchao doesn't work with a single-layer model such as model=nn.Linear(...), + # so we have to create a Module which contains linear layers. + class TestModel(nn.Module): + def __init__(self): + super(TestModel, self).__init__() + # Currently torchao only supports Linear module without bias. + self.linear1 = nn.Linear(32, 64, bias=False) + self.linear2 = nn.Linear(64, 32, bias=False) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.relu(self.linear1(x)) + return self.relu(self.linear2(x)) + + return TestModel().to(torch.device("cpu")).eval() + + @pytest.mark.parametrize("use_export", (False, True)) + def test_weight_only_quantization(self, use_export): + model = self._construct_test_model() + quantizer = quant_api.Int4WeightOnlyQuantizer( + precision=torch.float32, groupsize=32, inner_k_tiles=2, device=torch.device("cpu") + ) + model = quantizer.quantize(model) + input_data = torch.randn((2, 32), dtype=torch.float16) + + if use_export: + exported_model = torch.export.export(model, (input_data,)) + inputs = None + else: + exported_model = torch.jit.trace(model, example_inputs=(input_data,)) + inputs = [ct.TensorType(shape=input_data.shape, name="input")] + + converted_model = ct.convert( + exported_model, inputs=inputs, minimum_deployment_target=ct.target.iOS18 + ) + main_func = converted_model._mil_program.functions["main"] + quantize_ops = main_func.find_ops(op_type="constexpr_blockwise_shift_scale") + assert len(quantize_ops) > 0 + + if ct.utils._is_macos(): + result = converted_model.predict( + { + list(converted_model.input_description)[0]: input_data.detach() + .numpy() + .astype(np.float32) + } + ) + expected = model(input_data) + output_name = list(result.keys())[0] + np.testing.assert_allclose(result[output_name], expected.detach().numpy(), atol=1e-3) + + def test_weight_only_quantization_bfloat16_not_support(self): + """ + Torchao quant_api.int4_weight_only only supports bfloat16. + """ + model = self._construct_test_model().bfloat16() + quant_api.quantize_(model, quant_api.int4_weight_only(group_size=32, inner_k_tiles=2)) + model = unwrap_tensor_subclass(model) + input_data = torch.randn((2, 32), dtype=torch.float16) + exported_model = torch.export.export(model, (input_data,)) + # The conversion of bfloat16 hasn't been supported yet. + with pytest.raises(KeyError, match="torch.bfloat16"): + ct.convert(exported_model, minimum_deployment_target=ct.target.iOS17) + + @pytest.mark.parametrize("use_export", (True, False)) + def test_dynamic_activation_quantization_not_support(self, use_export): + """ + Although Int8DynActInt4WeightQuantizer will be deprecated, we still want + to test it because it's used in ExecuTorch to quantize llama models. + """ + model = self._construct_test_model() + quantizer = quant_api.Int8DynActInt4WeightQuantizer( + precision=torch.float16, groupsize=32, device=torch.device("cpu") + ) + model = quantizer.quantize(model) + input_data = torch.randn((2, 32), dtype=torch.float16) + + if use_export: + exported_model = torch.export.export(model, (input_data,)) + inputs = None + err_msg = "Unsupported fx node quantize_per_token" + err_type = ValueError + else: + exported_model = torch.jit.trace(model, example_inputs=(input_data,)) + inputs = [ct.TensorType(shape=input_data.shape)] + err_msg = "Dynamic activation quantization is not supported in Core ML" + err_type = NotImplementedError + + with pytest.raises(err_type, match=err_msg): + ct.convert(exported_model, inputs=inputs, minimum_deployment_target=ct.target.iOS17) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_export_conversion_api.py b/coremltools/converters/mil/frontend/torch/test/test_torch_export_conversion_api.py index 9d9896bfa..eeb05745f 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_export_conversion_api.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_export_conversion_api.py @@ -12,27 +12,155 @@ if not _HAS_TORCH_EXPORT_API: pytest.skip(allow_module_level=True, reason="torch.export is required") -USE_EDGE_DIALECT = [False] +from coremltools.converters.mil.frontend.torch.exir_utils import WRAPPED_SCALAR_INPUT_SUFFIX +from coremltools.converters.mil.frontend.torch.utils import TorchFrontend + +frontends = [TorchFrontend.TORCHEXPORT] + if _HAS_EXECUTORCH: - USE_EDGE_DIALECT.append(True) + import executorch.exir + + frontends.append(TorchFrontend.EXECUTORCH) import torch +import coremltools as ct from coremltools.converters.mil import testing_reqs from coremltools.converters.mil.mil.scope import ScopeSource -from .testing_utils import TorchBaseTest, TorchFrontend +from .testing_utils import TorchBaseTest backends = testing_reqs.backends compute_units = testing_reqs.compute_units +TORCH_EXPORT_DEFAULT_LOWER_BOUND = {TorchFrontend.TORCHEXPORT: 2, TorchFrontend.EXECUTORCH: 2} +if torch.__version__ >= "2.4.0": + TORCH_EXPORT_DEFAULT_LOWER_BOUND[TorchFrontend.TORCHEXPORT] = 0 + class TestTorchExportConversionAPI(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, use_edge_dialect, dynamic", - itertools.product(compute_units, backends, USE_EDGE_DIALECT, (True, False)), + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), + ) + def test_scalar_input(self, compute_unit, backend, frontend): + class Model(torch.nn.Module): + def forward(self, x): + return x + 1 + + model = Model() + model.eval() + + mlmodel = self.run_compare_torch( + (), + model, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + )[1] + main_function = mlmodel._mil_program.functions["main"] + + assert len(main_function.inputs) == 1 + input_name = list(main_function.inputs.keys())[0] + input_var = main_function.inputs[input_name] + assert input_name.endswith(WRAPPED_SCALAR_INPUT_SUFFIX) + assert input_var.shape == (1,) + + squeeze_op = main_function.find_ops(op_type="squeeze")[0] + if backend[1] == "fp32": + assert squeeze_op.x is input_var + elif backend[1] == "fp16": + cast_op = main_function.find_ops(op_type="cast")[0] + assert cast_op.x is input_var + assert cast_op.dtype.val == "fp16" + assert squeeze_op.x is cast_op.outputs[0] + + @pytest.mark.parametrize( + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), + ) + def test_dynamic_input(self, compute_unit, backend, frontend): + if ct.utils._macos_version() <= (14, 2): + pytest.xfail("rdar://135925921 ([CI] Upgrade External CI Machine OS)") + + class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(3, 5) + + def forward(self, x): + return self.linear(x) + + model = Model() + model.eval() + + batch_dim = torch.export.Dim("batch_dim") + dynamic_shapes = {"x": {0: batch_dim}} + + coreml_model = self.run_compare_torch( + (2, 3), + model, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + torch_export_dynamic_shapes=dynamic_shapes, + )[1] + + input_proto = coreml_model.input_description._fd_spec[0] + size_ranges = input_proto.type.multiArrayType.shapeRange.sizeRanges + assert size_ranges[0].lowerBound == TORCH_EXPORT_DEFAULT_LOWER_BOUND[frontend] + assert size_ranges[0].upperBound == 2147483647 + assert size_ranges[1].lowerBound == 3 + assert size_ranges[1].upperBound == 3 + + @pytest.mark.parametrize("frontend, dynamic", itertools.product(frontends, (True, False))) + def test_invalid_inputs(self, frontend, dynamic): + class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(3, 5) + + def forward(self, x): + return self.linear(x) + + model = Model() + model.eval() + + example_inputs = (torch.rand(2, 3),) + + dynamic_shapes = None + if dynamic: + batch_dim = torch.export.Dim("batch_dim") + dynamic_shapes = {"x": {0: batch_dim}} + + exported_program = torch.export.export( + model, + example_inputs, + dynamic_shapes=dynamic_shapes, + ) + if frontend == TorchFrontend.EXECUTORCH: + exported_program = executorch.exir.to_edge(exported_program).exported_program() + + with pytest.raises( + AssertionError, match=r"'inputs' argument should be None for ExportedProgram" + ): + inputs = [ct.TensorType(shape=(2, 3))] + if dynamic: + batch_dim = ct.RangeDim(lower_bound=1, upper_bound=128) + shape = (batch_dim, 3) + inputs = [ct.TensorType(shape=shape)] + ct.convert(exported_program, inputs=inputs) + + +class TestExecuTorchExamples(TorchBaseTest): + @pytest.mark.parametrize( + "compute_unit, backend, frontend, dynamic", + itertools.product(compute_units, backends, frontends, (True, False)), ) - def test_mul(self, compute_unit, backend, use_edge_dialect, dynamic): + def test_mul(self, compute_unit, backend, frontend, dynamic): + if ct.utils._macos_version() <= (14, 2) and dynamic: + pytest.xfail("rdar://135925921 ([CI] Upgrade External CI Machine OS)") + class MulModule(torch.nn.Module): def forward(self, input, other): return input * other @@ -46,15 +174,24 @@ def forward(self, input, other): "other": {0: dim0, 1: dim1}, } - _, coreml_model, _, _, _, _ = self.run_compare_torch( + coreml_model = self.run_compare_torch( [(3, 2), (3, 2)], MulModule(), compute_unit=compute_unit, backend=backend, - frontend=TorchFrontend.EXIR, - use_edge_dialect=use_edge_dialect, + frontend=frontend, torch_export_dynamic_shapes=dynamic_shapes, - ) + )[1] + + if dynamic: + for input_proto in coreml_model.input_description._fd_spec: + size_ranges = input_proto.type.multiArrayType.shapeRange.sizeRanges + assert size_ranges[0].lowerBound == TORCH_EXPORT_DEFAULT_LOWER_BOUND[frontend] + assert size_ranges[0].upperBound == 2147483647 + assert size_ranges[1].lowerBound == max( + 1, TORCH_EXPORT_DEFAULT_LOWER_BOUND[frontend] + ) + assert size_ranges[1].upperBound == 3 mil_program = coreml_model._mil_program mul = mil_program.functions["main"].find_ops(op_type="mul")[0] @@ -62,7 +199,7 @@ def forward(self, input, other): stack_trace = mul.scopes[ScopeSource.EXIR_STACK_TRACE][0] assert stack_trace.split("\n")[-2].strip() == "return input * other" - if use_edge_dialect: + if frontend == TorchFrontend.EXECUTORCH: debug_handle = mul.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] assert isinstance(debug_handle, int) @@ -101,10 +238,13 @@ def forward(self, input, other): assert ops[index_cast][-1]["Operator"] == "cast" @pytest.mark.parametrize( - "compute_unit, backend, use_edge_dialect, dynamic", - itertools.product(compute_units, backends, USE_EDGE_DIALECT, (True, False)), + "compute_unit, backend, frontend, dynamic", + itertools.product(compute_units, backends, frontends, (True, False)), ) - def test_linear(self, compute_unit, backend, use_edge_dialect, dynamic): + def test_linear(self, compute_unit, backend, frontend, dynamic): + if ct.utils._macos_version() <= (14, 2) and dynamic: + pytest.xfail("rdar://135925921 ([CI] Upgrade External CI Machine OS)") + class LinearModule(torch.nn.Module): def __init__(self): super().__init__() @@ -118,15 +258,22 @@ def forward(self, arg): batch_dim = torch.export.Dim("batch_dim") dynamic_shapes = {"arg": {0: batch_dim}} - _, coreml_model, _, _, _, _ = self.run_compare_torch( + coreml_model = self.run_compare_torch( [(3, 3)], LinearModule(), compute_unit=compute_unit, backend=backend, - frontend=TorchFrontend.EXIR, - use_edge_dialect=use_edge_dialect, + frontend=frontend, torch_export_dynamic_shapes=dynamic_shapes, - ) + )[1] + + if dynamic: + input_proto = coreml_model.input_description._fd_spec[0] + size_ranges = input_proto.type.multiArrayType.shapeRange.sizeRanges + assert size_ranges[0].lowerBound == TORCH_EXPORT_DEFAULT_LOWER_BOUND[frontend] + assert size_ranges[0].upperBound == 2147483647 + assert size_ranges[1].lowerBound == 3 + assert size_ranges[1].upperBound == 3 mil_program = coreml_model._mil_program linear = mil_program.functions["main"].find_ops(op_type="linear")[0] @@ -134,7 +281,7 @@ def forward(self, arg): stack_trace = linear.scopes[ScopeSource.EXIR_STACK_TRACE][0] assert stack_trace.split("\n")[-2].strip() == "return self.linear(arg)" - if use_edge_dialect: + if frontend == TorchFrontend.EXECUTORCH: debug_handle = linear.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] assert isinstance(debug_handle, int) @@ -174,10 +321,10 @@ def forward(self, arg): assert ops[index_cast][-1]["Operator"] == "cast" @pytest.mark.parametrize( - "compute_unit, backend, use_edge_dialect, dynamic", - itertools.product(compute_units, backends, USE_EDGE_DIALECT, (True, False)), + "compute_unit, backend, frontend, dynamic", + itertools.product(compute_units, backends, frontends, (True, False)), ) - def test_add(self, compute_unit, backend, use_edge_dialect, dynamic): + def test_add(self, compute_unit, backend, frontend, dynamic): if dynamic: pytest.skip( "https://github.com/apple/coremltools/issues/2307 " @@ -197,15 +344,20 @@ def forward(self, x, y): dim0 = torch.export.Dim("dim0", min=1) dynamic_shapes = {"x": {0: dim0}, "y": {0: dim0}} - _, coreml_model, _, _, _, _ = self.run_compare_torch( + coreml_model = self.run_compare_torch( [(1,), (1,)], AddModule(), compute_unit=compute_unit, backend=backend, - frontend=TorchFrontend.EXIR, - use_edge_dialect=use_edge_dialect, + frontend=frontend, torch_export_dynamic_shapes=dynamic_shapes, - ) + )[1] + + if dynamic: + for input_proto in coreml_model.input_description._fd_spec: + size_ranges = input_proto.type.multiArrayType.shapeRange.sizeRanges + assert size_ranges[0].lowerBound == 1 + assert size_ranges[0].upperBound == 2147483647 mil_program = coreml_model._mil_program adds = mil_program.functions["main"].find_ops(op_type="add") @@ -220,7 +372,7 @@ def forward(self, x, y): for i, stack_trace in enumerate(stack_traces): assert stack_trace.split("\n")[-2].strip() == source_codes[i] - if use_edge_dialect: + if frontend == TorchFrontend.EXECUTORCH: debug_handles = [add.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] for add in adds] for debug_handle in debug_handles: assert isinstance(debug_handle, int) @@ -268,10 +420,13 @@ def forward(self, x, y): assert ops[index_cast][-1]["Operator"] == "cast" @pytest.mark.parametrize( - "compute_unit, backend, use_edge_dialect, dynamic", - itertools.product(compute_units, backends, USE_EDGE_DIALECT, (True, False)), + "compute_unit, backend, frontend, dynamic", + itertools.product(compute_units, backends, frontends, (True, False)), ) - def test_add_mul(self, compute_unit, backend, use_edge_dialect, dynamic): + def test_add_mul(self, compute_unit, backend, frontend, dynamic): + if ct.utils._macos_version() <= (14, 2) and dynamic: + pytest.xfail("rdar://135925921 ([CI] Upgrade External CI Machine OS)") + class AddMulModule(torch.nn.Module): def forward(self, a, x, b): y = torch.mm(a, x) @@ -287,15 +442,34 @@ def forward(self, a, x, b): "b": {}, } - _, coreml_model, _, _, _, _ = self.run_compare_torch( + coreml_model = self.run_compare_torch( [(2, 2), (2, 2), (2, 2)], AddMulModule(), compute_unit=compute_unit, backend=backend, - frontend=TorchFrontend.EXIR, - use_edge_dialect=use_edge_dialect, + frontend=frontend, torch_export_dynamic_shapes=dynamic_shapes, - ) + )[1] + + if dynamic: + for i, input_proto in enumerate(coreml_model.input_description._fd_spec): + multi_array_type = input_proto.type.multiArrayType + shape = multi_array_type.shape + size_ranges = multi_array_type.shapeRange.sizeRanges + if i == 0: + assert size_ranges[0].lowerBound == 2 + assert size_ranges[0].upperBound == 2 + assert size_ranges[1].lowerBound == TORCH_EXPORT_DEFAULT_LOWER_BOUND[frontend] + assert size_ranges[1].upperBound == 2147483647 + elif i == 1: + assert size_ranges[0].lowerBound == TORCH_EXPORT_DEFAULT_LOWER_BOUND[frontend] + assert size_ranges[0].upperBound == 2147483647 + assert size_ranges[1].lowerBound == 2 + assert size_ranges[1].upperBound == 2 + else: + assert i == 2 + assert shape == [2, 2] + assert len(size_ranges) == 0 mil_program = coreml_model._mil_program matmul_or_add = {} @@ -314,7 +488,7 @@ def forward(self, a, x, b): source_code = source_codes[op_type] assert stack_trace.split("\n")[-2].strip() == source_code - if use_edge_dialect: + if frontend == TorchFrontend.EXECUTORCH: debug_handle = { k: v.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] for k, v in matmul_or_add.items() } @@ -364,10 +538,13 @@ def forward(self, a, x, b): assert ops[op_type][index_cast][-1]["Operator"] == "cast" @pytest.mark.parametrize( - "compute_unit, backend, use_edge_dialect, dynamic", - itertools.product(compute_units, backends, USE_EDGE_DIALECT, (True, False)), + "compute_unit, backend, frontend, dynamic", + itertools.product(compute_units, backends, frontends, (True, False)), ) - def test_softmax(self, compute_unit, backend, use_edge_dialect, dynamic): + def test_softmax(self, compute_unit, backend, frontend, dynamic): + if ct.utils._macos_version() <= (14, 2) and dynamic: + pytest.xfail("rdar://135925921 ([CI] Upgrade External CI Machine OS)") + class SoftmaxModule(torch.nn.Module): def __init__(self): super().__init__() @@ -381,15 +558,22 @@ def forward(self, x): vocab_dim = torch.export.Dim("vocab_dim") dynamic_shapes = {"x": {0: vocab_dim}} - _, coreml_model, _, _, _, _ = self.run_compare_torch( + coreml_model = self.run_compare_torch( [(2, 2)], SoftmaxModule(), compute_unit=compute_unit, backend=backend, - frontend=TorchFrontend.EXIR, - use_edge_dialect=use_edge_dialect, + frontend=frontend, torch_export_dynamic_shapes=dynamic_shapes, - ) + )[1] + + if dynamic: + input_proto = coreml_model.input_description._fd_spec[0] + size_ranges = input_proto.type.multiArrayType.shapeRange.sizeRanges + assert size_ranges[0].lowerBound == TORCH_EXPORT_DEFAULT_LOWER_BOUND[frontend] + assert size_ranges[0].upperBound == 2147483647 + assert size_ranges[1].lowerBound == 2 + assert size_ranges[1].upperBound == 2 mil_program = coreml_model._mil_program softmax = mil_program.functions["main"].find_ops(op_type="softmax")[0] @@ -397,7 +581,7 @@ def forward(self, x): stack_trace = softmax.scopes[ScopeSource.EXIR_STACK_TRACE][0] assert stack_trace.split("\n")[-2].strip() == "return self.softmax(x)" - if use_edge_dialect: + if frontend == TorchFrontend.EXECUTORCH: debug_handle = softmax.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] assert isinstance(debug_handle, int) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_export_quantization.py b/coremltools/converters/mil/frontend/torch/test/test_torch_export_quantization.py index 878bb25dc..031095dd0 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_export_quantization.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_export_quantization.py @@ -13,9 +13,12 @@ if not _HAS_TORCH_EXPORT_API: pytest.skip(allow_module_level=True, reason="torch.export is required") -USE_EDGE_DIALECT = [False] +from coremltools.converters.mil.frontend.torch.utils import TorchFrontend + +frontends = [TorchFrontend.TORCHEXPORT] + if _HAS_EXECUTORCH: - USE_EDGE_DIALECT.append(True) + frontends.append(TorchFrontend.EXECUTORCH) import torch @@ -42,7 +45,7 @@ QuantizationScheme, ) -from .testing_utils import TorchBaseTest, TorchFrontend +from .testing_utils import TorchBaseTest class TestTorchExportQuantization(TorchBaseTest): @@ -108,18 +111,16 @@ def make_torch_quantized_graph( return converted_graph @pytest.mark.parametrize( - "quantizer_name, quantization_type, is_per_channel, nbit, use_edge_dialect", + "quantizer_name, quantization_type, is_per_channel, nbit, frontend", itertools.product( ("XNNPack", "CoreML"), ("PTQ", "QAT"), (True, False), (4, 8), - USE_EDGE_DIALECT, + frontends, ), ) - def test_conv_relu( - self, quantizer_name, quantization_type, is_per_channel, nbit, use_edge_dialect - ): + def test_conv_relu(self, quantizer_name, quantization_type, is_per_channel, nbit, frontend): SHAPE = (1, 3, 256, 256) class Model(torch.nn.Module): @@ -152,8 +153,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: _, mlmodel, _, _, _, _ = self.run_compare_torch( SHAPE, converted_graph, - frontend=TorchFrontend.EXIR, - use_edge_dialect=use_edge_dialect, + frontend=frontend, backend=("mlprogram", "fp16"), minimum_deployment_target=minimum_deployment_target, ) @@ -173,18 +173,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: assert constexpr_affine_dequantize_op.quantized_data.dtype in (types.int8, types.uint8) @pytest.mark.parametrize( - "quantizer_name, quantization_type, is_per_channel, nbit, use_edge_dialect", + "quantizer_name, quantization_type, is_per_channel, nbit, frontend", itertools.product( ("XNNPack", "CoreML"), ("PTQ", "QAT"), (True, False), (4, 8), - USE_EDGE_DIALECT, + frontends, ), ) - def test_linear( - self, quantizer_name, quantization_type, is_per_channel, nbit, use_edge_dialect - ): + def test_linear(self, quantizer_name, quantization_type, is_per_channel, nbit, frontend): SHAPE = (1, 5) class Model(torch.nn.Module): @@ -213,8 +211,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: _, mlmodel, _, _, _, _ = self.run_compare_torch( SHAPE, converted_graph, - frontend=TorchFrontend.EXIR, - use_edge_dialect=use_edge_dialect, + frontend=frontend, backend=("mlprogram", "fp16"), minimum_deployment_target=minimum_deployment_target, ) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py index 673ae47b3..aa9b627cc 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py @@ -10,20 +10,19 @@ import numpy as np import pytest + +torch = pytest.importorskip("torch") import torch.nn as nn import coremltools as ct from coremltools import RangeDim, Shape, TensorType -from coremltools._deps import ( - _HAS_EXECUTORCH, - _HAS_TORCH_AUDIO, - _HAS_TORCH_VISION, - version_lt, -) +from coremltools._deps import _HAS_TORCH_AUDIO, _HAS_TORCH_VISION, version_lt from coremltools.converters.mil import testing_reqs from coremltools.converters.mil.frontend.torch.utils import ( NUM_TO_TORCH_DTYPE, NUMPY_DTYPE_TO_TORCH_NUM, + TORCH_EXPORT_BASED_FRONTENDS, + TorchFrontend, ) from coremltools.converters.mil.mil import Operation, Program, types from coremltools.converters.mil.mil.var import Var @@ -38,8 +37,9 @@ from .testing_utils import ( ModuleWrapper, TorchBaseTest, - TorchFrontend, contains_op, + export_torch_model_to_frontend, + frontends, generate_input_data, ) @@ -50,13 +50,15 @@ import torchvision -frontends = [TorchFrontend.TORCHSCRIPT] - -if _HAS_EXECUTORCH: - frontends.append(TorchFrontend.EXIR) - backends = testing_reqs.backends compute_units = testing_reqs.compute_units +for frontend in frontends: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + # torch.export limits the number of compilation frames to prevent infinite loop + # However, those frames are not immediately released after torch.export is done, + # so when we have many torch.export calls, we can still hit the frame number limit + torch._dynamo.config.accumulated_cache_size_limit = 1000000 + break torch = pytest.importorskip("torch") torch.manual_seed(30) @@ -69,7 +71,6 @@ class TestScriptedModels(TorchBaseTest): - @staticmethod def get_while_loop_model(): class TestLayer(nn.Module): @@ -107,35 +108,29 @@ def forward(self, x): def test_while_loop(self, compute_unit, backend): model = TestScriptedModels.get_while_loop_model() self.run_compare_torch( - model.input_size, - model, - backend=backend, - compute_unit=compute_unit, - use_scripting=True + model.input_size, model, backend=backend, compute_unit=compute_unit, use_scripting=True ) - @pytest.mark.parametrize( - "compute_unit, backend", itertools.product(compute_units, backends) - ) + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) def test_cond(self, compute_unit, backend): torch_model = TestScriptedModels.get_cond_model() self.run_compare_torch( - torch.tensor([1.]), + torch.tensor([1.0]), torch_model, input_as_shape=False, backend=backend, compute_unit=compute_unit, - use_scripting=True + use_scripting=True, ) self.run_compare_torch( - torch.tensor([11.]), + torch.tensor([11.0]), torch_model, input_as_shape=False, backend=backend, compute_unit=compute_unit, - use_scripting=True + use_scripting=True, ) @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) @@ -161,11 +156,7 @@ def forward(self, x): model = TestNet().eval() self.run_compare_torch( - model.input_size, - model, - backend=backend, - compute_unit=compute_unit, - use_scripting=True + model.input_size, model, backend=backend, compute_unit=compute_unit, use_scripting=True ) @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) @@ -195,11 +186,7 @@ def forward(self, x): model = TestNet().eval() self.run_compare_torch( - model.input_size, - model, - backend=backend, - compute_unit=compute_unit, - use_scripting=True + model.input_size, model, backend=backend, compute_unit=compute_unit, use_scripting=True ) @pytest.mark.parametrize( @@ -252,9 +239,30 @@ def test_conv(self, compute_unit, backend): class TestMean(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", itertools.product(compute_units, backends) + "compute_unit, backend, frontend, keepdim", + itertools.product(compute_units, backends, frontends, (True, False)), + ) + def test_mean(self, compute_unit, backend, frontend, keepdim): + class Model(nn.Module): + def forward(self, x): + return torch.mean(x, dim=(2, 3), keepdim=keepdim) + + model = Model() + shape = (1, 3, 256, 256) + + self.run_compare_torch( + shape, + model, + frontend=frontend, + backend=backend, + compute_unit=compute_unit, + ) + + @pytest.mark.parametrize( + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), ) - def test_with_flexible_shape(self, compute_unit, backend): + def test_with_flexible_shape(self, compute_unit, backend, frontend): if backend[0] == "mlprogram" and _macos_version() < (13, 0): pytest.xfail( "Issue fixed in iOS16/macOS13: https://github.com/apple/coremltools/issues/1420" @@ -284,15 +292,14 @@ def forward(self, x): self.run_compare_torch( shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, converter_input_type=converter_input_type, ) @staticmethod - @pytest.mark.skipif( - ct.utils._macos_version() < (13, 0), reason="Bug fixed in macOS13/iOS16" - ) + @pytest.mark.skipif(ct.utils._macos_version() < (13, 0), reason="Bug fixed in macOS13/iOS16") def test_flexible_shape_with_default_value(): # test for bug reported in https://github.com/apple/coremltools/issues/1420 class Network(torch.nn.Module): @@ -443,18 +450,20 @@ def test( class TestFrac(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape", + "compute_unit, backend, frontend, shape", itertools.product( compute_units, backends, + frontends, COMMON_SHAPES, ), ) - def test_frac(self, compute_unit, backend, shape): + def test_frac(self, compute_unit, backend, frontend, shape): model = ModuleWrapper(function=torch.frac) TorchBaseTest.run_compare_torch( shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, rand_range=(-10.0, 10.0), @@ -543,9 +552,7 @@ class TestSort(TorchBaseTest): ), ) def test_sort(self, compute_unit, backend, shape, axis, descending): - model = ModuleWrapper( - function=torch.sort, kwargs={"dim": axis, "descending": descending} - ) + model = ModuleWrapper(function=torch.sort, kwargs={"dim": axis, "descending": descending}) TorchBaseTest.run_compare_torch( shape, model, @@ -655,15 +662,16 @@ def test_dot(self, compute_unit, backend, vector_length): class TestOuter(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, x_vector_length, y_vector_length", + "compute_unit, backend, frontend, x_vector_length, y_vector_length", itertools.product( compute_units, backends, + frontends, [1, 5], [1, 3], ), ) - def test_outer(self, compute_unit, backend, x_vector_length, y_vector_length): + def test_outer(self, compute_unit, backend, frontend, x_vector_length, y_vector_length): model = ModuleWrapper(function=torch.outer) vector1 = generate_input_data((x_vector_length,)) @@ -672,6 +680,7 @@ def test_outer(self, compute_unit, backend, x_vector_length, y_vector_length): TorchBaseTest.run_compare_torch( (vector1, vector2), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -732,9 +741,7 @@ class TestNorms(TorchBaseTest): def test_frobenius_norm(self, compute_unit, backend, shape, keepdim): num_dims = len(shape) for dim in range(-num_dims, num_dims): - model = ModuleWrapper( - function=torch.norm, kwargs={"keepdim": keepdim, "dim": dim} - ) + model = ModuleWrapper(function=torch.norm, kwargs={"keepdim": keepdim, "dim": dim}) TorchBaseTest.run_compare_torch( shape, model, @@ -768,15 +775,15 @@ def test_number_norm(self, compute_unit, backend, shape, p, keepdim): class TestNarrow(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape", + "compute_unit, backend, frontend, shape", itertools.product( compute_units, backends, + frontends, COMMON_SHAPES, ), ) - def test_narrow(self, compute_unit, backend, shape): - + def test_narrow(self, compute_unit, backend, frontend, shape): class Model(torch.nn.Module): def __init__(self, dim, start, length): super().__init__() @@ -787,9 +794,8 @@ def __init__(self, dim, start, length): def forward(self, x): return torch.narrow(x, self.dim, self.start, self.length) - for cur_dim in range(len(shape)): - for cur_start in range(shape[cur_dim]-1): + for cur_start in range(shape[cur_dim] - 1): for cur_length in range(1, shape[cur_dim] - cur_start): m = Model(cur_dim, cur_start, cur_length) @@ -797,6 +803,7 @@ def forward(self, x): TorchBaseTest.run_compare_torch( shape, m, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -879,11 +886,7 @@ def _is_valid_config(self, shape, order, dim): if order is not None: if len(shape) > 2: return False - elif ( - len(shape) == 2 - and not isinstance(order, str) - and (order == 0 or order > 2) - ): + elif len(shape) == 2 and not isinstance(order, str) and (order == 0 or order > 2): return False elif len(shape) == 1 and isinstance(order, str): return False @@ -993,18 +996,20 @@ def test_longer_range_input_element_values(self): TorchBaseTest.run_compare_torch(x, model, input_as_shape=False) @pytest.mark.parametrize( - "compute_unit, backend, shape", + "compute_unit, backend, frontend, shape", itertools.product( compute_units, backends, + frontends, COMMON_SHAPES, ), ) - def test_additional_shapes_and_backends(self, compute_unit, backend, shape): + def test_additional_shapes_and_backends(self, compute_unit, backend, frontend, shape): model = TestHardswish.HardswishModel() TorchBaseTest.run_compare_torch( shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -1013,9 +1018,7 @@ def test_additional_shapes_and_backends(self, compute_unit, backend, shape): class TestBatchNorm(TorchBaseTest): @pytest.mark.parametrize( "compute_unit, backend, num_features, eps, affine", - itertools.product( - compute_units, backends, [5, 3, 1], [0.1, 1e-05], [True, False] - ), + itertools.product(compute_units, backends, [5, 3, 1], [0.1, 1e-05], [True, False]), ) def test_batchnorm(self, compute_unit, backend, num_features, eps, affine): model = nn.BatchNorm2d(num_features, eps, affine=affine) @@ -1061,9 +1064,7 @@ def forward(self, x): ["None", "Batch", "Height", "Width", "Depth", "All"], ), ) - def test_batchnorm_3d( - self, compute_unit, backend, num_features, eps, affine, dynamic_input - ): + def test_batchnorm_3d(self, compute_unit, backend, num_features, eps, affine, dynamic_input): model = nn.BatchNorm3d(num_features, eps, affine=affine) input_shape = (6, num_features, 2, 3, 4) if dynamic_input == "None": @@ -1076,27 +1077,19 @@ def test_batchnorm_3d( else: if dynamic_input == "Batch": converter_input_type = [ - TensorType( - shape=(RangeDim(1, 10), num_features, 2, 3, 4), dtype=np.float32 - ) + TensorType(shape=(RangeDim(1, 10), num_features, 2, 3, 4), dtype=np.float32) ] elif dynamic_input == "Height": converter_input_type = [ - TensorType( - shape=(6, num_features, RangeDim(1, 10), 3, 4), dtype=np.float32 - ) + TensorType(shape=(6, num_features, RangeDim(1, 10), 3, 4), dtype=np.float32) ] elif dynamic_input == "Width": converter_input_type = [ - TensorType( - shape=(6, num_features, 2, RangeDim(1, 10), 4), dtype=np.float32 - ) + TensorType(shape=(6, num_features, 2, RangeDim(1, 10), 4), dtype=np.float32) ] elif dynamic_input == "Depth": converter_input_type = [ - TensorType( - shape=(6, num_features, 2, 3, RangeDim(1, 10)), dtype=np.float32 - ) + TensorType(shape=(6, num_features, 2, 3, RangeDim(1, 10)), dtype=np.float32) ] elif dynamic_input == "All": converter_input_type = [ @@ -1302,70 +1295,77 @@ def test_instancenorm_1d(self, compute_unit, backend, num_features): class TestGroupNorm(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, group_features, eps, affine", + "compute_unit, backend, frontend, group_features, eps, affine", itertools.product( - compute_units, backends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] + compute_units, backends, frontends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] ), ) - def test_groupnorm(self, compute_unit, backend, group_features, eps, affine): - model = nn.GroupNorm( - group_features[0], group_features[1], eps=eps, affine=affine - ) + def test_groupnorm(self, compute_unit, backend, frontend, group_features, eps, affine): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch uses native_group_norm") + + model = nn.GroupNorm(group_features[0], group_features[1], eps=eps, affine=affine) self.run_compare_torch( (6, group_features[1], 5, 5), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, group_features, eps, affine", + "compute_unit, backend, frontend, group_features, eps, affine", itertools.product( - compute_units, backends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] + compute_units, backends, frontends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] ), ) def test_groupnorm_rank3_input( - self, compute_unit, backend, group_features, eps, affine + self, compute_unit, backend, frontend, group_features, eps, affine ): - model = nn.GroupNorm( - group_features[0], group_features[1], eps=eps, affine=affine - ) + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch uses native_group_norm") + + model = nn.GroupNorm(group_features[0], group_features[1], eps=eps, affine=affine) self.run_compare_torch( (6, group_features[1], 5), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, group_features, eps, affine", + "compute_unit, backend, frontend, group_features, eps, affine", itertools.product( - compute_units, backends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] + compute_units, backends, frontends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] ), ) def test_groupnorm_rank2_input( - self, compute_unit, backend, group_features, eps, affine + self, compute_unit, backend, frontend, group_features, eps, affine ): - model = nn.GroupNorm( - group_features[0], group_features[1], eps=eps, affine=affine - ) + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch uses native_group_norm") + + model = nn.GroupNorm(group_features[0], group_features[1], eps=eps, affine=affine) self.run_compare_torch( (4, group_features[1]), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, group_features, eps, affine", + "compute_unit, backend, frontend, group_features, eps, affine", itertools.product( - compute_units, backends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] + compute_units, backends, frontends, [(16, 32), (1, 1)], [0.1, 1e-05], [True, False] ), ) - def test_groupnorm_dynamic(self, compute_unit, backend, group_features, eps, affine): - model = nn.GroupNorm( - group_features[0], group_features[1], eps=eps, affine=affine - ) + def test_groupnorm_dynamic(self, compute_unit, backend, frontend, group_features, eps, affine): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch uses native_group_norm") + + model = nn.GroupNorm(group_features[0], group_features[1], eps=eps, affine=affine) dim_upper_bound = 30 if backend[0] == "mlprogram" else -1 converter_input_type = [ TensorType( @@ -1381,6 +1381,7 @@ def test_groupnorm_dynamic(self, compute_unit, backend, group_features, eps, aff self.run_compare_torch( (6, group_features[1], 10, 10), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, converter_input_type=converter_input_type, @@ -1389,13 +1390,10 @@ def test_groupnorm_dynamic(self, compute_unit, backend, group_features, eps, aff class TestLinear(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", - itertools.product( - compute_units, - backends, - ), + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), ) - def test_linear_fp16(self, compute_unit, backend): + def test_linear_fp16(self, compute_unit, backend, frontend): class Model(nn.Module): def __init__(self): super().__init__() @@ -1408,6 +1406,7 @@ def forward(self, x): self.run_compare_torch( torch.randn(4, 4, dtype=torch.float16), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -1415,65 +1414,75 @@ def forward(self, x): ) @pytest.mark.parametrize( - "compute_unit, backend, in_features, out_features, bias", + "compute_unit, backend, frontend, in_features, out_features, bias", itertools.product( compute_units, backends, + frontends, [5], [10], [True, False], ), ) def test_linear_rank1_input( - self, compute_unit, backend, in_features, out_features, bias + self, compute_unit, backend, frontend, in_features, out_features, bias ): model = nn.Linear(in_features, out_features, bias=bias) self.run_compare_torch( (in_features,), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, in_features, out_features, bias", - itertools.product(compute_units, backends, [10, 25], [3, 6], [True, False]), + "compute_unit, backend, frontend, in_features, out_features, bias", + itertools.product(compute_units, backends, frontends, [10, 25], [3, 6], [True, False]), ) def test_linear_rank2_input( - self, compute_unit, backend, in_features, out_features, bias + self, compute_unit, backend, frontend, in_features, out_features, bias ): model = nn.Linear(in_features, out_features, bias=bias) self.run_compare_torch( (1, in_features), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, in_features, out_features, bias", - itertools.product(compute_units, backends, [10], [6], [True, False]), + "compute_unit, backend, frontend, in_features, out_features, bias", + itertools.product(compute_units, backends, frontends, [10], [6], [True, False]), ) def test_linear_rank3_input( - self, compute_unit, backend, in_features, out_features, bias + self, compute_unit, backend, frontend, in_features, out_features, bias ): model = nn.Linear(in_features, out_features, bias=bias) self.run_compare_torch( (1, 3, in_features), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, in_features, out_features, bias", - itertools.product(compute_units, backends, [10], [6], [True, False]), + "compute_unit, backend, frontend, in_features, out_features, bias", + itertools.product(compute_units, backends, frontends, [10], [6], [True, False]), ) def test_linear_rank4_input( - self, compute_unit, backend, in_features, out_features, bias + self, compute_unit, backend, frontend, in_features, out_features, bias ): model = nn.Linear(in_features, out_features, bias=bias) - self.run_compare_torch((1, 5, 3, in_features), model, backend=backend) + self.run_compare_torch( + (1, 5, 3, in_features), + model, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + ) class TestConv(TorchBaseTest): @@ -1482,6 +1491,7 @@ class TestConv(TorchBaseTest): [ "compute_unit", "backend", + "frontend", "padding", "stride", "length", @@ -1493,10 +1503,11 @@ class TestConv(TorchBaseTest): ] ), [ - (compute_unit, backend, padding, stride, *param) - for compute_unit, backend, padding, stride, param in itertools.product( + (compute_unit, backend, frontend, padding, stride, *param) + for compute_unit, backend, frontend, padding, stride, param in itertools.product( [ct.ComputeUnit.CPU_ONLY], backends, + frontends, ["same", "valid", 0, 1], [1, 2, 3], [ @@ -1516,6 +1527,7 @@ def test_convolution1d( self, compute_unit, backend, + frontend, padding, stride, length, @@ -1524,7 +1536,6 @@ def test_convolution1d( kernel_size, dilation, bias, - groups=1, ): if padding == "same" and stride != 1: # configuration not supported @@ -1541,6 +1552,7 @@ def test_convolution1d( self.run_compare_torch( (1, in_channels, length), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -1550,6 +1562,7 @@ def test_convolution1d( [ "compute_unit", "backend", + "frontend", "padding", "stride", "height", @@ -1562,10 +1575,11 @@ def test_convolution1d( ] ), [ - (compute_unit, backend, padding, stride, *param) - for compute_unit, backend, padding, stride, param in itertools.product( + (compute_unit, backend, frontend, padding, stride, *param) + for compute_unit, backend, frontend, padding, stride, param in itertools.product( [ct.ComputeUnit.CPU_ONLY], backends, + frontends, ["same", "valid", 1, 0], [1, 2, 3], [ @@ -1585,6 +1599,7 @@ def test_convolution2d( self, compute_unit, backend, + frontend, padding, stride, height, @@ -1594,7 +1609,6 @@ def test_convolution2d( kernel_size, dilation, bias, - groups=1, ): if padding == "same" and stride != 1: return @@ -1610,6 +1624,7 @@ def test_convolution2d( self.run_compare_torch( (1, in_channels, height, width), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -1619,6 +1634,7 @@ def test_convolution2d( [ "compute_unit", "backend", + "frontend", "padding", "stride", "depth", @@ -1632,10 +1648,11 @@ def test_convolution2d( ] ), [ - (compute_unit, backend, padding, stride, *param) - for compute_unit, backend, padding, stride, param in itertools.product( + (compute_unit, backend, frontend, padding, stride, *param) + for compute_unit, backend, frontend, padding, stride, param in itertools.product( [ct.ComputeUnit.CPU_ONLY], backends, + frontends, ["same", "valid", 1, 0], [1, 2, 3], [ @@ -1655,6 +1672,7 @@ def test_convolution3d( self, compute_unit, backend, + frontend, padding, stride, depth, @@ -1665,7 +1683,6 @@ def test_convolution3d( kernel_size, dilation, bias, - groups=1, ): if padding == "same" and stride != 1: return @@ -1681,6 +1698,7 @@ def test_convolution3d( self.run_compare_torch( (1, in_channels, depth, height, width), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -1692,6 +1710,7 @@ class TestDynamicConv(TorchBaseTest): [ "compute_unit", "backend", + "frontend", "width", "in_channels", "out_channels", @@ -1701,10 +1720,11 @@ class TestDynamicConv(TorchBaseTest): ] ), [ - (compute_unit, backend, *param) - for compute_unit, backend, param in itertools.product( + (compute_unit, backend, frontend, *param) + for compute_unit, backend, frontend, param in itertools.product( compute_units, backends, + frontends, [ (5, 1, 1, 1, 2, 1), (3, 1, 1, 1, 2, 3), @@ -1722,6 +1742,7 @@ def test_convolution1d( self, compute_unit, backend, + frontend, width, in_channels, out_channels, @@ -1732,9 +1753,7 @@ def test_convolution1d( ): class DynamicConv(nn.Module): def forward(self, input_data, weights): - return nn.functional.conv1d( - input_data, weights, stride=stride, padding=padding - ) + return nn.functional.conv1d(input_data, weights, stride=stride, padding=padding) model = DynamicConv() input_shape = [ @@ -1744,6 +1763,7 @@ def forward(self, input_data, weights): self.run_compare_torch( input_shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -1753,6 +1773,7 @@ def forward(self, input_data, weights): [ "compute_unit", "backend", + "frontend", "height", "width", "in_channels", @@ -1763,10 +1784,11 @@ def forward(self, input_data, weights): ] ), [ - (compute_unit, backend, *param) - for compute_unit, backend, param in itertools.product( + (compute_unit, backend, frontend, *param) + for compute_unit, backend, frontend, param in itertools.product( compute_units, backends, + frontends, [ (5, 3, 1, 1, 1, 2, 0), (3, 3, 1, 1, 1, 2, 1), @@ -1784,6 +1806,7 @@ def test_convolution2d( self, compute_unit, backend, + frontend, height, width, in_channels, @@ -1795,9 +1818,7 @@ def test_convolution2d( ): class DynamicConv(nn.Module): def forward(self, input_data, weights): - return nn.functional.conv2d( - input_data, weights, stride=stride, padding=padding - ) + return nn.functional.conv2d(input_data, weights, stride=stride, padding=padding) model = DynamicConv() @@ -1806,7 +1827,7 @@ def forward(self, input_data, weights): (out_channels, int(in_channels / groups), kernel_size, kernel_size), ] self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) @@ -1816,6 +1837,7 @@ class TestConvTranspose(TorchBaseTest): [ "compute_unit", "backend", + "frontend", "width", "in_channels", "out_channels", @@ -1826,10 +1848,11 @@ class TestConvTranspose(TorchBaseTest): ] ), [ - (compute_unit, backend, *param) - for compute_unit, backend, param in itertools.product( + (compute_unit, backend, frontend, *param) + for compute_unit, backend, frontend, param in itertools.product( compute_units, backends, + frontends, [ (3, 1, 1, 1, 2, 0, 1), (3, 1, 1, 1, 2, 1, 3), @@ -1847,6 +1870,7 @@ def test_convolution_transpose1d( self, compute_unit, backend, + frontend, width, in_channels, out_channels, @@ -1868,6 +1892,7 @@ def test_convolution_transpose1d( self.run_compare_torch( (1, in_channels, width), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -1877,6 +1902,7 @@ def test_convolution_transpose1d( [ "compute_unit", "backend", + "frontend", "height", "width", "in_channels", @@ -1888,10 +1914,11 @@ def test_convolution_transpose1d( ] ), [ - (compute_unit, backend, *param) - for compute_unit, backend, param in itertools.product( + (compute_unit, backend, frontend, *param) + for compute_unit, backend, frontend, param in itertools.product( compute_units, backends, + frontends, [ (5, 5, 1, 1, 1, 2, 0, 1), (5, 5, 1, 1, 1, 2, 1, 3), @@ -1909,6 +1936,7 @@ def test_convolution_transpose2d( self, compute_unit, backend, + frontend, height, width, in_channels, @@ -1917,7 +1945,6 @@ def test_convolution_transpose2d( stride, padding, dilation, - groups=1, ): model = nn.ConvTranspose2d( in_channels=in_channels, @@ -1930,15 +1957,17 @@ def test_convolution_transpose2d( self.run_compare_torch( (1, in_channels, height, width), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, dynamic_input", + "compute_unit, backend, frontend, dynamic_input", itertools.product( compute_units, backends, + frontends, [True, False], ), ) @@ -1946,6 +1975,7 @@ def test_convolution_transpose2d_dynamic_input( self, compute_unit, backend, + frontend, dynamic_input, ): in_channels = 5 @@ -1961,6 +1991,7 @@ def test_convolution_transpose2d_dynamic_input( in_width = 512 input_shape = (1, in_channels, in_height, in_width) + converter_input_type = None if dynamic_input: upper_bound = 4096 if backend[0] == "mlprogram" else -1 converter_input_type = [ @@ -1969,26 +2000,21 @@ def test_convolution_transpose2d_dynamic_input( dtype=np.float32, ) ] - self.run_compare_torch( - input_shape, - model, - backend=backend, - compute_unit=compute_unit, - converter_input_type=converter_input_type, - ) - else: - self.run_compare_torch( - input_shape, - model, - backend=backend, - compute_unit=compute_unit, - ) + self.run_compare_torch( + input_shape, + model, + converter_input_type=converter_input_type, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + ) @pytest.mark.parametrize( ",".join( [ "compute_unit", "backend", + "frontend", "height", "width", "in_channels", @@ -2001,10 +2027,11 @@ def test_convolution_transpose2d_dynamic_input( ] ), [ - (compute_unit, backend, *param) - for compute_unit, backend, param in itertools.product( + (compute_unit, backend, frontend, *param) + for compute_unit, backend, frontend, param in itertools.product( compute_units, backends, + frontends, [ (5, 5, 1, 1, 1, 2, 1, 1, 1), (5, 5, 1, 1, 1, 2, 2, 3, 2), @@ -2022,6 +2049,7 @@ def test_convolution_transpose2d_output_padding( self, compute_unit, backend, + frontend, height, width, in_channels, @@ -2031,7 +2059,6 @@ def test_convolution_transpose2d_output_padding( padding, dilation, output_padding, - groups=1, ): # Output padding must be less than either stride or dilation @@ -2053,13 +2080,20 @@ def test_convolution_transpose2d_output_padding( dilation=dilation, output_padding=output_padding, ) - self.run_compare_torch((1, in_channels, height, width), model, backend=backend) + self.run_compare_torch( + (1, in_channels, height, width), + model, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + ) @pytest.mark.parametrize( ",".join( [ "compute_unit", "backend", + "frontend", "depth", "height", "width", @@ -2072,10 +2106,11 @@ def test_convolution_transpose2d_output_padding( ] ), [ - (compute_unit, backend, *param) - for compute_unit, backend, param in itertools.product( + (compute_unit, backend, frontend, *param) + for compute_unit, backend, frontend, param in itertools.product( compute_units, backends, + frontends, [ (3, 5, 5, 1, 1, 1, 2, 0, 1), (3, 5, 5, 1, 1, 1, 2, 1, 3), @@ -2086,42 +2121,13 @@ def test_convolution_transpose2d_output_padding( (4, 6, 5, 3, 3, 1, 3, 1, 3), ], ) - ] - + [ - pytest.param( - ct.ComputeUnit.CPU_ONLY, - "neualnetwork", - 5, - 5, - 1, - 1, - 3, - 4, - 1, - 1, - 2, - marks=pytest.mark.xfail, - ), - pytest.param( - ct.ComputeUnit.CPU_ONLY, - "neualnetwork", - 5, - 5, - 1, - 1, - 3, - 2, - 1, - 3, - 2, - marks=pytest.mark.xfail, - ), ], ) def test_convolution_transpose3d( self, compute_unit, backend, + frontend, depth, height, width, @@ -2143,6 +2149,7 @@ def test_convolution_transpose3d( self.run_compare_torch( (1, in_channels, depth, height, width), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -2589,9 +2596,7 @@ def forward(self, x): # Value of y is Nondeterministic, so return length return torch.Tensor([len(y)]) - self.run_compare_torch( - shape, TestModel(), backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(shape, TestModel(), backend=backend, compute_unit=compute_unit) @pytest.mark.parametrize( "compute_unit, backend, shape", @@ -3018,7 +3023,7 @@ def test_max_pool3d( padding, ceil_mode, ): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail("TODO (rdar://115846125): handle multi-output op max_pool3d_with_indices") if padding > kernel_size / 2: @@ -3119,10 +3124,11 @@ def forward(self, x, y): class TestAMaxAMin(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_shapes, mode, reduce_dim, keepdim", + "compute_unit, backend, frontend, input_shapes, mode, reduce_dim, keepdim", itertools.product( compute_units, backends, + frontends, [ [(2, 5, 7, 3)], [(3, 2, 9)], @@ -3133,13 +3139,15 @@ class TestAMaxAMin(TorchBaseTest): [True, False], ), ) - def test_minimum_maximum(self, compute_unit, backend, input_shapes, mode, reduce_dim, keepdim): + def test_minimum_maximum( + self, compute_unit, backend, frontend, input_shapes, mode, reduce_dim, keepdim + ): class TestModel(torch.nn.Module): def forward(self, input): if type(reduce_dim) == int: reduce_dim_clamped = min(input.dim() - 1, reduce_dim) else: - reduce_dim_clamped = reduce_dim[:input.dim()] + reduce_dim_clamped = reduce_dim[: input.dim()] if mode == "minimum": return torch.amin(input, reduce_dim_clamped, keepdim) elif mode == "maximum": @@ -3149,16 +3157,16 @@ def forward(self, input): model = TestModel() self.run_compare_torch( - input_shapes, model, backend=backend, compute_unit=compute_unit + input_shapes, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestPoolSymbolicInput(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", - itertools.product(compute_units, backends), + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), ) - def test_max_pool(self, compute_unit, backend): + def test_max_pool(self, compute_unit, backend, frontend): model = nn.MaxPool2d( kernel_size=1, stride=2, @@ -3177,16 +3185,17 @@ def test_max_pool(self, compute_unit, backend): self.run_compare_torch( input_shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, converter_input_type=converter_input_type, ) @pytest.mark.parametrize( - "compute_unit, backend", - itertools.product(compute_units, backends), + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), ) - def test_avg_pool(self, compute_unit, backend): + def test_avg_pool(self, compute_unit, backend, frontend): model = nn.AvgPool2d( kernel_size=2, stride=2, @@ -3205,6 +3214,7 @@ def test_avg_pool(self, compute_unit, backend): self.run_compare_torch( input_shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, converter_input_type=converter_input_type, @@ -3538,17 +3548,15 @@ def forward(self, x): # Check GitHub Issue #810, assume num_layers == 2 and bidirectional == True class TestStackedBLSTM(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional", + "compute_unit, backend, input_size, hidden_size, bias, batch_first, dropout", itertools.product( compute_units, backends, [7], [5], - [2], [True, False], [True, False], [0.3], - [True], ), ) def test_lstm( @@ -3557,11 +3565,9 @@ def test_lstm( backend, input_size, hidden_size, - num_layers, bias, batch_first, dropout, - bidirectional, ): model = nn.Sequential( nn.LSTM( @@ -3609,8 +3615,10 @@ def test_lstm( class TestConcat(TorchBaseTest): - @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) - def test_cat_basic(self, compute_unit, backend): + @pytest.mark.parametrize( + "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends) + ) + def test_cat_basic(self, compute_unit, backend, frontend): class TestNet(nn.Module): def forward(self, x): x = torch.cat((x, x), axis=1) @@ -3620,12 +3628,15 @@ def forward(self, x): self.run_compare_torch( (1, 2, 3), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) - @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) - def test_cat_with_empty(self, compute_unit, backend): + @pytest.mark.parametrize( + "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends) + ) + def test_cat_with_empty(self, compute_unit, backend, frontend): class TestNet(nn.Module): def forward(self, x): return torch.cat((x, torch.tensor([])), axis=1) @@ -3634,14 +3645,18 @@ def forward(self, x): self.run_compare_torch( (1, 2, 3), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend", itertools.product(compute_units, backends) + "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends) ) - def test_cat_input_types_promotion(self, compute_unit, backend): + def test_cat_input_types_promotion(self, compute_unit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("executorch does not allow mixed dtypes") + class TestNet(nn.Module): def forward(self, x, y): return torch.cat((x, y), axis=1) @@ -3651,6 +3666,7 @@ def forward(self, x, y): self.run_compare_torch( [input_data_x, input_data_y], TestNet(), + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -3660,9 +3676,9 @@ def forward(self, x, y): # has one item. NN throws an error for this case, hence why we have to # run through the full conversion process to test it. @pytest.mark.parametrize( - "compute_unit, backend", itertools.product(compute_units, backends) + "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends) ) - def test_cat_single_input(self, compute_unit, backend): + def test_cat_single_input(self, compute_unit, backend, frontend): class TestNet(nn.Module): def forward(self, x): x = torch.cat((x,), axis=1) @@ -3672,12 +3688,15 @@ def forward(self, x): self.run_compare_torch( (1, 3, 16, 16), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) - @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) - def test_cat_const_fold(self, compute_unit, backend): + @pytest.mark.parametrize( + "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends) + ) + def test_cat_const_fold(self, compute_unit, backend, frontend): class TestNet(nn.Module): def forward(self, x): x = torch.tensor([[[1, 2], [2, 3], [3, 4]]]) @@ -3687,6 +3706,7 @@ def forward(self, x): mlmodel = self.run_compare_torch( (1, 2, 3), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -3694,23 +3714,22 @@ def forward(self, x): # The `listconstruct` is folded into a single const. assert len(prog.find_ops(op_type="const")) == 1 - with patch.object(Var, '_is_nonreplaceable_var') as mocked_is_nonreplaceable_var: + with patch.object(Var, "_is_nonreplaceable_var") as mocked_is_nonreplaceable_var: # Mock that the input with shape [1, 3, 2] const is non-replaceable. mocked_is_nonreplaceable_var.side_effect = ( lambda var: var.op and var.op.op_type == "const" and var.rank == 3 ) mlmodel = self.run_compare_torch( - [(1, 2, 3)], - model, - backend=backend, - compute_unit=compute_unit + [(1, 2, 3)], model, frontend=frontend, backend=backend, compute_unit=compute_unit ) prog = mlmodel[1]._mil_program # The `listconstruct` is not folded so there are 3 const ops. assert len(prog.find_ops(op_type="const")) == 3 - @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) - def test_concat_alias(self, compute_unit, backend): + @pytest.mark.parametrize( + "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends) + ) + def test_concat_alias(self, compute_unit, backend, frontend): class Outer(torch.nn.Module): def __init__(self, net): super(Outer, self).__init__() @@ -3730,6 +3749,7 @@ def forward(self, x): self.run_compare_torch( (1, 3, 16, 16), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -3737,14 +3757,15 @@ def forward(self, x): class TestTile(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, dims", + "compute_unit, backend, frontend, dims", itertools.product( compute_units, backends, + frontends, [(1, 2, 4), (3, 2), (2,)], ), ) - def test_tile(self, compute_unit, backend, dims): + def test_tile(self, compute_unit, backend, frontend, dims): class TestModel(nn.Module): def forward(self, x): return torch.tile(x, dims) @@ -3752,6 +3773,7 @@ def forward(self, x): self.run_compare_torch( (2, 3, 5), TestModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -3759,14 +3781,15 @@ def forward(self, x): class TestBitwiseNot(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_type", + "compute_unit, backend, frontend, input_type", itertools.product( compute_units, backends, + frontends, ["int", "bool"], ), ) - def test_bitwise_not(self, compute_unit, backend, input_type): + def test_bitwise_not(self, compute_unit, backend, frontend, input_type): class TestNet(nn.Module): def forward(self, x): return torch.bitwise_not(x) @@ -3779,6 +3802,7 @@ def forward(self, x): self.run_compare_torch( torch_in, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -3799,14 +3823,15 @@ def _get_inputs(self, input_types): return (x, y) @pytest.mark.parametrize( - "compute_unit, backend, input_types", + "compute_unit, backend, frontend, input_types", itertools.product( compute_units, backends, + frontends, [("int", "int"), ("int", "bool"), ("bool", "int"), ("bool", "bool")], ), ) - def test_mul_int_or_bool(self, compute_unit, backend, input_types): + def test_mul_int_or_bool(self, compute_unit, backend, frontend, input_types): class TestMulWithBool(nn.Module): def forward(self, x, y): return x * y @@ -3816,20 +3841,22 @@ def forward(self, x, y): self.run_compare_torch( (x, y), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, ) @pytest.mark.parametrize( - "compute_unit, backend, input_types", + "compute_unit, backend, frontend, input_types", itertools.product( compute_units, backends, + frontends, [("int", "int"), ("int", "bool"), ("bool", "int"), ("bool", "bool")], ), ) - def test_add_int_or_bool(self, compute_unit, backend, input_types): + def test_add_int_or_bool(self, compute_unit, backend, frontend, input_types): class TestAddWithBool(nn.Module): def forward(self, x, y): return x + y @@ -3839,21 +3866,26 @@ def forward(self, x, y): self.run_compare_torch( (x, y), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, ) @pytest.mark.parametrize( - "compute_unit, backend, x_complex, y_complex", + "compute_unit, backend, frontend, x_complex, y_complex", itertools.product( compute_units, backends, + frontends, (True, False), (True, False), ), ) - def test_add_complex(self, compute_unit, backend, x_complex, y_complex): + def test_add_complex(self, compute_unit, backend, frontend, x_complex, y_complex): + if frontend == TorchFrontend.EXECUTORCH and (x_complex or y_complex): + pytest.skip("Complex is not aten canonical") + class TestAddComplexModel(nn.Module): def forward(self, x, y): if x_complex: @@ -3867,6 +3899,7 @@ def forward(self, x, y): TestAddComplexModel(), compute_unit=compute_unit, backend=backend, + frontend=frontend, ) @@ -3987,10 +4020,11 @@ def forward(self, x): class TestDim(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape", + "compute_unit, backend, frontend, shape", itertools.product( compute_units, backends, + frontends, [ (1,), (2, 3), @@ -3998,13 +4032,13 @@ class TestDim(TorchBaseTest): ], ), ) - def test_dim(self, compute_unit, backend, shape): + def test_dim(self, compute_unit, backend, frontend, shape): class DimModel(nn.Module): def forward(self, x): return torch.tensor([x.dim()]) self.run_compare_torch( - shape, DimModel().eval(), backend=backend, compute_unit=compute_unit + shape, DimModel().eval(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @@ -4144,9 +4178,7 @@ def forward(self, x): input_shape = (3, 3) if eye_type == "single" else (2, 3) model = Model().eval() - self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(input_shape, model, backend=backend, compute_unit=compute_unit) class TestOnes(TorchBaseTest): @@ -4202,39 +4234,50 @@ def forward(self, x): class TestRandint(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape, low, high", + "compute_unit, backend, frontend, shape, low, high", itertools.product( compute_units, backends, + frontends, [(1,), (2, 3)], [-1, 2], [3, 5], ), ) - def test_randint(self, compute_unit, backend, shape, low, high): + def test_randint(self, compute_unit, backend, frontend, shape, low, high): class TestModel(nn.Module): def forward(self, x): y = torch.randint(low, high, x.shape) - return torch.Tensor([len(y)]) + if frontend == TorchFrontend.TORCHSCRIPT: + return torch.Tensor([len(y)]) + else: + return torch.tensor(y.shape) self.run_compare_torch( - shape, TestModel(), backend=backend, compute_unit=compute_unit - ) - + shape, + TestModel(), + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + ) + + @pytest.mark.parametrize("frontend", frontends) + def test_tuple_input(self, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.randint.low is not Aten Canonical") - def test_tuple_input(self): class TestModel(nn.Module): def forward(self, x): return torch.randint(0, 3, (10,)) model = TestModel().eval() x = torch.randn((1, 3, 256, 256)) - traced_model = torch.jit.trace(model, example_inputs=x) - ct.convert(traced_model, inputs=[ct.TensorType(shape=x.shape)]) + torch_model = export_torch_model_to_frontend(model, (x,), frontend) + inputs = [ct.TensorType(shape=x.shape)] if frontend == TorchFrontend.TORCHSCRIPT else None + ct.convert(torch_model, inputs=inputs) class TestRand(TorchBaseTest): - @pytest.mark.parametrize( "compute_unit, backend, shape, dtype", itertools.product( @@ -4249,45 +4292,51 @@ class TestModel(nn.Module): def forward(self, x): y = torch.rand(x.shape, dtype=dtype) # can't compare directly (this is random) - return torch.stack([ - torch.ones_like(y, dtype=torch.float32), - (y >= 0).to(torch.float32), - (y < 1).to(torch.float32), - ]) + return torch.stack( + [ + torch.ones_like(y, dtype=torch.float32), + (y >= 0).to(torch.float32), + (y < 1).to(torch.float32), + ] + ) - self.run_compare_torch( - shape, TestModel(), backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(shape, TestModel(), backend=backend, compute_unit=compute_unit) class TestRandn(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape", + "compute_unit, backend, frontend, shape", itertools.product( compute_units, backends, + frontends, [(1,), (2, 3)], ), ) - def test_randn(self, compute_unit, backend, shape): + def test_randn(self, compute_unit, backend, frontend, shape): class TestModel(nn.Module): def forward(self, x): y = torch.randn(*x.shape) - return torch.Tensor([len(y)]) + if frontend == TorchFrontend.TORCHSCRIPT: + return torch.Tensor([len(y)]) + else: + return torch.tensor(y.shape) self.run_compare_torch( - shape, TestModel(), backend=backend, compute_unit=compute_unit + shape, + TestModel(), + compute_unit=compute_unit, + backend=backend, + frontend=frontend, ) - @pytest.mark.parametrize( - "dtype", - [torch.complex64, torch.cfloat, torch.complex128, torch.cdouble] + "dtype", [torch.complex64, torch.cfloat, torch.complex128, torch.cdouble] ) def test_invalid_complex_dtype(self, dtype): class TestModel(torch.nn.Module): def forward(self, x): - return torch.randn((5, 4), dtype=torch.cfloat) + return torch.randn((5, 4), dtype=dtype) with pytest.raises(AssertionError, match="complex number dtype"): self.run_compare_torch((5, 4), TestModel()) @@ -4295,24 +4344,27 @@ def forward(self, x): class TestRandnLike(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape", + "compute_unit, backend, frontend, shape", itertools.product( compute_units, backends, + frontends, [(1,), (2, 3)], ), ) - def test_randn_like(self, compute_unit, backend, shape): + def test_randn_like(self, compute_unit, backend, frontend, shape): class TestModel(nn.Module): def forward(self, x): y = torch.randn_like(torch.randn(shape)) - return torch.Tensor([len(y)]) + if frontend == TorchFrontend.TORCHSCRIPT: + return torch.Tensor([len(y)]) + else: + return torch.tensor(y.shape) self.run_compare_torch( - shape, TestModel(), backend=backend, compute_unit=compute_unit + shape, TestModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) - @pytest.mark.parametrize( "dtype", [torch.complex64, torch.cfloat, torch.complex128, torch.cdouble] @@ -4320,7 +4372,7 @@ def forward(self, x): def test_invalid_complex_dtype(self, dtype): class TestModel(torch.nn.Module): def forward(self, x): - return torch.randn_like(x, dtype=torch.cfloat) + return torch.randn_like(x, dtype=dtype) with pytest.raises(AssertionError, match="complex number dtype"): self.run_compare_torch((5, 4), TestModel()) @@ -4332,11 +4384,6 @@ class TestTypeAs(TorchBaseTest): itertools.product(compute_units, backends, ["int32", "float32", "bool"]), ) def test_type_as(self, compute_unit, backend, type): - if backend == ("mlprogram", "fp16") and type == "bool": - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) - class TestNet(nn.Module): def forward(self, x, y): return x.type_as(y) @@ -4420,9 +4467,7 @@ def test_min_max_with_no_arguments(self, compute_unit, backend, input_shape, mod @pytest.mark.parametrize( "compute_unit, backend, input_shape, dim, mode", - itertools.product( - compute_units, backends, [(2, 2), (1, 1)], [0, 1], ["min", "max"] - ), + itertools.product(compute_units, backends, [(2, 2), (1, 1)], [0, 1], ["min", "max"]), ) def test_min_max_no_keepdim(self, compute_unit, backend, input_shape, dim, mode): input_data = torch.rand(input_shape) @@ -4444,55 +4489,52 @@ def test_min_max_no_keepdim(self, compute_unit, backend, input_shape, dim, mode) ) def test_min_max_two_tensors(self, compute_unit, backend, input_shape, mode): model = self.TestModel(mode) - self.run_compare_torch( - [input_shape] * 2, model, backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch([input_shape] * 2, model, backend=backend, compute_unit=compute_unit) class TestLayerNorm(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_shape, eps", + "compute_unit, backend, frontend, input_shape, eps", itertools.product( [ct.ComputeUnit.CPU_ONLY], backends, + frontends, [(1, 3, 15, 15), (1, 1, 1, 1)], [1e-5, 1e-7], ), ) - def test_layer_norm(self, compute_unit, backend, input_shape, eps): + def test_layer_norm(self, compute_unit, backend, frontend, input_shape, eps): model = nn.LayerNorm(input_shape, eps=eps) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestPixelShuffle(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, batch_size, CHW, r", + "compute_unit, backend, frontend, batch_size, CHW, r", itertools.product( - compute_units, backends, [1, 3], [(1, 4, 4), (3, 2, 3)], [2, 4] + compute_units, backends, frontends, [1, 3], [(1, 4, 4), (3, 2, 3)], [2, 4] ), ) - def test_pixel_shuffle(self, compute_unit, backend, batch_size, CHW, r): + def test_pixel_shuffle(self, compute_unit, backend, frontend, batch_size, CHW, r): C, H, W = CHW input_shape = (batch_size, C * r * r, H, W) model = nn.PixelShuffle(upscale_factor=r) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) -@pytest.mark.skipif( - _macos_version() < (13, 0), reason="New functionality in macOS13/iOS16" -) +@pytest.mark.skipif(_macos_version() < (13, 0), reason="New functionality in macOS13/iOS16") class TestPixelUnshuffle(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, batch_size, CHW, r", + "compute_unit, backend, frontend, batch_size, CHW, r", itertools.product( - compute_units, backends, [1, 3], [(1, 4, 4), (3, 2, 3)], [2, 4] + compute_units, backends, frontends, [1, 3], [(1, 4, 4), (3, 2, 3)], [2, 4] ), ) - def test_pixel_shuffle(self, compute_unit, backend, batch_size, CHW, r): + def test_pixel_shuffle(self, compute_unit, backend, frontend, batch_size, CHW, r): if backend[0] == "neuralnetwork": pytest.skip("pixel_unshuffle only supported in mlprogram backend.") @@ -4502,6 +4544,7 @@ def test_pixel_shuffle(self, compute_unit, backend, batch_size, CHW, r): self.run_compare_torch( input_shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, minimum_deployment_target=ct.target.iOS16, @@ -4510,10 +4553,11 @@ def test_pixel_shuffle(self, compute_unit, backend, batch_size, CHW, r): class TestExpand(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [ [(2, 1), (2, 2)], [(3, 1), (-1, 4)], @@ -4523,7 +4567,7 @@ class TestExpand(TorchBaseTest): ], ), ) - def test_expand(self, compute_unit, backend, shapes): + def test_expand(self, compute_unit, backend, frontend, shapes): input_shape, output_shape = shapes class TestModel(torch.nn.Module): @@ -4533,18 +4577,21 @@ def forward(self, x): model = TestModel() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, minimum_deployment_target", + "compute_unit, backend, frontend, minimum_deployment_target", itertools.product( compute_units, backends, + frontends, [None, ct.target.iOS17], ), ) - def test_expand_dynamic_shape0(self, compute_unit, backend, minimum_deployment_target): + def test_expand_dynamic_shape0( + self, compute_unit, backend, frontend, minimum_deployment_target + ): class TestModel(nn.Module): def forward(self, x): return x.expand(x.shape[1], x.shape[1]) @@ -4558,19 +4605,21 @@ def forward(self, x): shape=[1, ct.RangeDim(upper_bound=20 if backend[0] == "mlprogram" else -1)] ) ], + frontend=frontend, backend=backend, compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_expand_dynamic_shape1(self, compute_unit, backend): + def test_expand_dynamic_shape1(self, compute_unit, backend, frontend): class TestModel(nn.Module): def forward(self, x): return x.expand(x.shape[0], 1, x.shape[-1], x.shape[-1]) @@ -4588,18 +4637,20 @@ def forward(self, x): ] ) ], + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_expand_dynamic_shape2(self, compute_unit, backend): + def test_expand_dynamic_shape2(self, compute_unit, backend, frontend): class TestModel(nn.Module): def forward(self, x): return x.expand(x.shape[-1], 1, x.shape[-1], x.shape[-1]) @@ -4610,18 +4661,20 @@ def forward(self, x): TestModel(), input_as_shape=False, converter_input_type=[TensorType(shape=[1, ct.RangeDim(upper_bound=upper_bound)])], + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_expand_dynamic_shape3(self, compute_unit, backend): + def test_expand_dynamic_shape3(self, compute_unit, backend, frontend): class TestModel(nn.Module): def forward(self, x): return x.expand(x.shape[0], 10) @@ -4639,18 +4692,27 @@ def forward(self, x): ] ) ], + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_expand_dynamic_shape_from_another_input(self, compute_unit, backend): + def test_expand_dynamic_shape_from_another_input(self, compute_unit, backend, frontend): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip( + "torch._dynamo.exc.UserError: Tried to use data-dependent value in the subsequent " + "computation. This can happen when we encounter unbounded dynamic value that is " + "unknown during tracing time." + ) + class TestModel(nn.Module): def forward(self, x, y): return x.expand(int(y[0]), int(y[1])) @@ -4665,15 +4727,17 @@ def forward(self, x, y): ), TensorType(shape=(2,)), ], + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @pytest.mark.parametrize( - "compute_unit, backend, input_shapes", + "compute_unit, backend, frontend, input_shapes", itertools.product( compute_units, backends, + frontends, [ [(2, 1), (2, 2)], [(3, 1), (3, 4)], @@ -4682,7 +4746,7 @@ def forward(self, x, y): ], ), ) - def test_expand_as(self, compute_unit, backend, input_shapes): + def test_expand_as(self, compute_unit, backend, frontend, input_shapes): class TestModel(torch.nn.Module): def forward(self, x, y): return x.expand_as(y) @@ -4690,31 +4754,119 @@ def forward(self, x, y): model = TestModel() self.run_compare_torch( - input_shapes, model, backend=backend, compute_unit=compute_unit + input_shapes, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestExpandDims(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank_and_axis", + "compute_unit, backend, frontend, rank_and_axis", itertools.product( compute_units, backends, - [ - (rank, axis) - for rank in range(1, 5) - for axis in range(-rank - 1, rank + 1) - ], + frontends, + [(rank, axis) for rank in range(1, 5) for axis in range(-rank - 1, rank + 1)], ), ) - def test_unsqueeze(self, compute_unit, backend, rank_and_axis): + def test_unsqueeze(self, compute_unit, backend, frontend, rank_and_axis): rank, axis = rank_and_axis input_shape = tuple(np.random.randint(low=2, high=10, size=rank)) model = ModuleWrapper(function=torch.unsqueeze, kwargs={"dim": axis}) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend + ) + + +class TestAtLeastND(TorchBaseTest): + @staticmethod + def _generate_input_shape(input_rank): + if input_rank == 0: + # Core ML does not support scalar input, so we use rank-1 size-1 tensor then squeeze + input_shape = (1,) + else: + input_shape = np.random.randint(2, 5, input_rank) + return input_shape + + @pytest.mark.parametrize( + "compute_unit, backend, frontend, rank, input_rank", + itertools.product( + compute_units, + backends, + frontends, + (1, 2, 3), + (0, 1, 2, 3, 4, 5), + ), + ) + def test_atleast_nd(self, compute_unit, backend, frontend, rank, input_rank): + if backend[0] == "neuralnetwork" and rank in (2, 3) and input_rank == 0: + pytest.xfail("rdar://134723147 nn backend additionally expands a dim") + + class Model(torch.nn.Module): + def forward(self, x): + # Core ML does not support scalar input, so we use rank-1 size-1 tensor then squeeze + if input_rank == 0: + x = torch.squeeze(x) + if rank == 1: + result = torch.atleast_1d(x) + elif rank == 2: + result = torch.atleast_2d(x) + else: + assert rank == 3 + result = torch.atleast_3d(x) + return result + + input_shape = self._generate_input_shape(input_rank) + model = Model() + + self.run_compare_torch( + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) + @pytest.mark.parametrize( + "compute_unit, backend, frontend, rank, input_rank", + itertools.product( + compute_units, + backends, + frontends, + (1, 2, 3), + (0, 1, 2, 3, 4, 5), + ), + ) + def test_atleast_nd_sequence(self, compute_unit, backend, frontend, rank, input_rank): + if backend[0] == "neuralnetwork" and rank in (2, 3) and input_rank == 0: + pytest.xfail("rdar://134723147 nn backend additionally expands a dim") + + class Model(torch.nn.Module): + def forward(self, x, y): + # Core ML does not support scalar input, so we use rank-1 size-1 tensor then squeeze + if input_rank == 0: + x = torch.squeeze(x) + y = torch.squeeze(y) + + # Lowering "tuple input as output" pymil program gives wrong output, + # so insert add ops to avoid "input as output" + # TODO (rdar://134722912) Fix the "tuple input as output" pymil program lowering + x = x + 1.0 + y = y + 2.0 + + if rank == 1: + result = torch.atleast_1d((x, y)) + elif rank == 2: + result = torch.atleast_2d((x, y)) + else: + assert rank == 3 + result = torch.atleast_3d((x, y)) + return result + + input_shape = [ + self._generate_input_shape(input_rank), + self._generate_input_shape(input_rank), + ] + model = Model() + + self.run_compare_torch( + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend + ) class TestLinspace(TorchBaseTest): @pytest.mark.parametrize( @@ -4735,13 +4887,9 @@ def forward(self, x): return torch.linspace(start, end, steps) model = Model() - self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(input_shape, model, backend=backend, compute_unit=compute_unit) - @pytest.mark.parametrize( - "compute_unit, backend", itertools.product(compute_units, backends) - ) + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) def test_linspace_static_large(self, compute_unit, backend): input_shape = tuple([1]) @@ -4750,9 +4898,7 @@ def forward(self, x): return torch.linspace(1, 2_000_000, 2_000_000) model = Model() - self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(input_shape, model, backend=backend, compute_unit=compute_unit) @pytest.mark.parametrize( "compute_unit, backend, start_end, steps", @@ -4794,29 +4940,20 @@ def forward(self, x): model = Model() mlmodel = self.run_compare_torch( - [(1, 2, 3)], - model, - backend=backend, - compute_unit=compute_unit + [(1, 2, 3)], model, backend=backend, compute_unit=compute_unit ) prog = mlmodel[1]._mil_program # The linspace op is folded to const, so there is no range_1d op. assert len(prog.find_ops(op_type="const")) == 1 assert len(prog.find_ops(op_type="range_1d")) == 0 - with patch.object(Var, '_is_nonreplaceable_var') as mocked_is_nonreplaceable_var: + with patch.object(Var, "_is_nonreplaceable_var") as mocked_is_nonreplaceable_var: # Mock that the first param to linspace is non-replaceable. mocked_is_nonreplaceable_var.side_effect = ( - lambda var: var.op - and var.op.op_type == "const" - and var.rank == 0 - and var.val == 0 + lambda var: var.op and var.op.op_type == "const" and var.rank == 0 and var.val == 0 ) mlmodel = self.run_compare_torch( - [(1, 2, 3)], - model, - backend=backend, - compute_unit=compute_unit + [(1, 2, 3)], model, backend=backend, compute_unit=compute_unit ) prog = mlmodel[1]._mil_program # The linspace op is not folded to const, but translated to range_1d instead. @@ -4825,10 +4962,11 @@ def forward(self, x): class TestArange(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, start_end_step", + "compute_unit, backend, frontend, start_end_step", itertools.product( compute_units, backends, + frontends, [ (-0.1, -0.7, -0.07), (3, 10, 0.3), @@ -4838,16 +4976,11 @@ class TestArange(TorchBaseTest): ], ), ) - def test_arange_static(self, compute_unit, backend, start_end_step): + def test_arange_static(self, compute_unit, backend, frontend, start_end_step): if start_end_step == (1, 10, 1e-6): - pytest.xfail( - "rdar://88998831 (range_1d has numerical issue when the step is small)" - ) - input_shape = tuple( - [ - 1, - ] - ) + pytest.xfail("rdar://88998831 (range_1d has numerical issue when the step is small)") + + input_shape = (1,) start, end, step = start_end_step class Model(nn.Module): @@ -4856,14 +4989,15 @@ def forward(self, x): model = Model() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, start_end_step", + "compute_unit, backend, frontend, start_end_step", itertools.product( compute_units, backends, + frontends, [ (-0.1, -0.7, -0.07), (3, 10, 0.3), @@ -4872,7 +5006,14 @@ def forward(self, x): ], ), ) - def test_arange_dynamic(self, compute_unit, backend, start_end_step): + def test_arange_dynamic(self, compute_unit, backend, frontend, start_end_step): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip( + "torch._dynamo.exc.UserError: Tried to use data-dependent value in the subsequent " + "computation. This can happen when we encounter unbounded dynamic value that is " + "unknown during tracing time." + ) + start, end, step = start_end_step class Model(nn.Module): @@ -4884,32 +5025,55 @@ def forward(self, x): self.run_compare_torch( inputs, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, ) + @pytest.mark.parametrize( + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), + ) + def test_arange_without_start(self, compute_unit, backend, frontend): + class Model(nn.Module): + def forward(self, x): + return torch.arange(10) + + model = Model() + self.run_compare_torch( + (1,), model, compute_unit=compute_unit, backend=backend, frontend=frontend + ) + class TestEinsum(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, equation, reverse_input_order, dynamic", + "compute_unit, backend, frontend, equation, reverse_input_order, dynamic", itertools.product( compute_units, backends, + frontends, einsum_equations, [False, True], [False, True], ), ) - def test_binary_einsum(self, compute_unit, backend, equation, reverse_input_order, dynamic): + def test_binary_einsum( + self, compute_unit, backend, frontend, equation, reverse_input_order, dynamic + ): if dynamic and backend[0] == "mlprogram" and ct.utils._macos_version() > (14, 2): pytest.xfail("rdar://120386990 (Einsum Model Failed)") + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch einsum decomposition issue") + class TestBinaryEinsum(nn.Module): def forward(self, x, y): return torch.einsum(equation, x, y) input_shapes, converter_input_type = gen_input_shapes_einsum(equation, dynamic, backend) + if frontend != TorchFrontend.TORCHSCRIPT: + converter_input_type = None if reverse_input_order: input_output_strings = equation.split("->") @@ -4923,10 +5087,11 @@ def forward(self, x, y): res = self.run_compare_torch( input_shapes, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=True, - converter_input_type=converter_input_type + converter_input_type=converter_input_type, ) # Verify the pattern of the hardcode einsum cases @@ -4947,19 +5112,20 @@ def forward(self, x, y): assert "shape" not in ops_in_prog @pytest.mark.parametrize( - "compute_unit, backend, equation, dynamic", + "compute_unit, backend, frontend, equation, dynamic", itertools.product( compute_units, backends, + frontends, ["ab->ba", "aa->a", "ab->b", "iijk->ji"], [False, True], ), ) - def test_unary_einsum(self, compute_unit, backend, equation, dynamic): - if backend == ("mlprogram", "fp16") and equation == "iijk->ji" and dynamic: - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) + def test_unary_einsum(self, compute_unit, backend, frontend, equation, dynamic): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch einsum decomposition issue") + if platform.machine() == "x86_64" and dynamic and equation == "iijk->ji": + pytest.xfail("rdar://135843153 ([Bug] Models failed on x86_64 platform)") class TestUnaryEinsum(nn.Module): def forward(self, x): @@ -4970,22 +5136,27 @@ def forward(self, x): self.run_compare_torch( input_shapes, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=True, - converter_input_type=converter_input_type + converter_input_type=converter_input_type, ) @pytest.mark.parametrize( - "compute_unit, backend, equation, dynamic", + "compute_unit, backend, frontend, equation, dynamic", itertools.product( compute_units, backends, + frontends, ["ab,bc,cd->ba", "abb,abc,a->ab"], [False, True], ), ) - def test_ternary_einsum(self, compute_unit, backend, equation, dynamic): + def test_ternary_einsum(self, compute_unit, backend, frontend, equation, dynamic): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch einsum decomposition issue") + class TestTernaryEinsum(nn.Module): def forward(self, x, y, z): return torch.einsum(equation, x, y, z) @@ -4995,6 +5166,7 @@ def forward(self, x, y, z): self.run_compare_torch( input_shapes, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=True, @@ -5002,13 +5174,14 @@ def forward(self, x, y, z): ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_einsum_with_same_input(self, compute_unit, backend): + def test_einsum_with_same_input(self, compute_unit, backend, frontend): class Einsum(nn.Module): def forward(self, m1, m2, m3): y1 = torch.einsum("bnhd,bdhm->bnhm", m1, m2) @@ -5024,6 +5197,7 @@ def forward(self, m1, m2, m3): self.run_compare_torch( [m1, m2, m3], Einsum(), + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -5108,9 +5282,7 @@ def test_cumsum(self, compute_unit, backend, axis): input_shape = list(np.random.randint(low=2, high=10, size=4)) input_shape = tuple(input_shape) model = ModuleWrapper(function=torch.cumsum, kwargs={"dim": axis}) - self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(input_shape, model, backend=backend, compute_unit=compute_unit) class TestReshape(TorchBaseTest): @@ -5165,17 +5337,18 @@ def test_reshape_scalar(self, compute_unit, backend, frontend, minimum_deploymen class TestReshapeAs(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_output_shape", + "compute_unit, backend, frontend, input_output_shape", itertools.product( compute_units, backends, + frontends, [ ((6, 1, 1), (3, 2)), ((8,), (2, 1, 1, 2, 2)), ], ), ) - def test_reshape(self, compute_unit, backend, input_output_shape): + def test_reshape(self, compute_unit, backend, frontend, input_output_shape): class Model(nn.Module): def forward(self, x, ref): return x.reshape_as(ref) @@ -5185,6 +5358,7 @@ def forward(self, x, ref): self.run_compare_torch( [input_shape, output_shape], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -5192,10 +5366,10 @@ def forward(self, x, ref): class TestFlatten(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, start_dim, end_dim, is_dynamic", - itertools.product(compute_units, backends, [2, -2, 0], [3, -1], [False, True]), + "compute_unit, backend, frontend, start_dim, end_dim, is_dynamic", + itertools.product(compute_units, backends, frontends, [2, -2, 0], [3, -1], [False, True]), ) - def test_flatten(self, compute_unit, backend, start_dim, end_dim, is_dynamic): + def test_flatten(self, compute_unit, backend, frontend, start_dim, end_dim, is_dynamic): input_shape = (2, 3, 4, 5) converter_input_type = None if is_dynamic: @@ -5217,6 +5391,7 @@ def test_flatten(self, compute_unit, backend, start_dim, end_dim, is_dynamic): self.run_compare_torch( input_shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, converter_input_type=converter_input_type, @@ -5225,16 +5400,17 @@ def test_flatten(self, compute_unit, backend, start_dim, end_dim, is_dynamic): class TestUnflatten(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, dim, auto_infer_idx, dynamic", + "compute_unit, backend, frontend, dim, auto_infer_idx, dynamic", itertools.product( compute_units, backends, + frontends, (0, 1, -1, -2), (0, 1, None), (True, False), ), ) - def test_unflatten(self, compute_unit, backend, dim, auto_infer_idx, dynamic): + def test_unflatten(self, compute_unit, backend, frontend, dim, auto_infer_idx, dynamic): if dynamic and auto_infer_idx is not None: pytest.skip("Auto-inferring shape (-1) not supported for dynamic input.") @@ -5273,6 +5449,7 @@ def forward(self, x): (NHEAD * BATCH_SIZE, NHEAD * INPUT_SIZE), Head(NHEAD, BATCH_SIZE, INPUT_SIZE, OUTPUT_SIZE), converter_input_type=inputs, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -5280,12 +5457,12 @@ def forward(self, x): class TestGather(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank_and_axis", + "compute_unit, backend, frontend, rank_and_axis", itertools.product( - compute_units, backends, [(i, j) for i in range(1, 6) for j in range(0, i)] + compute_units, backends, frontends, [(i, j) for i in range(1, 6) for j in range(0, i)] ), ) - def test_gather_along_axis(self, compute_unit, backend, rank_and_axis): + def test_gather_along_axis(self, compute_unit, backend, frontend, rank_and_axis): rank, axis = rank_and_axis params_shape = np.random.randint(low=2, high=5, size=rank) indices_shape = np.copy(params_shape) @@ -5296,13 +5473,15 @@ def test_gather_along_axis(self, compute_unit, backend, rank_and_axis): function=torch.gather, kwargs={"dim": axis, "index": torch.from_numpy(indices)}, ) - self.run_compare_torch([params_shape], model, backend=backend, compute_unit=compute_unit) + self.run_compare_torch( + [params_shape], model, compute_unit=compute_unit, backend=backend, frontend=frontend + ) @pytest.mark.parametrize( - "compute_unit, backend, input_enumerated_shape", - itertools.product(compute_units, backends, (True, False)), + "compute_unit, backend, frontend, input_enumerated_shape", + itertools.product(compute_units, backends, frontends, (True, False)), ) - def test_gather_enumerated_shape(self, compute_unit, backend, input_enumerated_shape): + def test_gather_enumerated_shape(self, compute_unit, backend, frontend, input_enumerated_shape): axis = 0 params_shape = (2, 3, 4) indices_shape = (3, 3, 4) @@ -5330,6 +5509,7 @@ def forward(self, x, index): Model(), input_as_shape=False, converter_input_type=converter_input_type, + frontend=frontend, backend=backend, compute_unit=compute_unit, minimum_deployment_target=ct.target.iOS17, @@ -5345,24 +5525,47 @@ def test_gather_along_axis_invalid_indices(self): torch.gather(data, 1, torch.tensor([[0, 0], [2, 0]])) @pytest.mark.parametrize( - "compute_unit, backend, dynamic", - itertools.product(compute_units, backends, [True, False]), + "compute_unit, backend, frontend, dynamic", + itertools.product(compute_units, backends, frontends, [True, False]), ) - def test_gather_nd_int16_indices(self, compute_unit, backend, dynamic): + def test_gather_nd_int16_indices(self, compute_unit, backend, frontend, dynamic): """Test the indices access in torch model which gets lowered to gather_nd.""" B, C, H, W, T = 1, 24, 64, 64, 32 data = torch.rand(B, C, H, W) time = (torch.rand(1, T) * (C - 1)).to(torch.int) - class DynamicModel(torch.nn.Module): - def forward(self, data, time): - return data[torch.arange(B).unsqueeze(1), time, :, :] + if frontend == TorchFrontend.TORCHSCRIPT: + + class DynamicModel(torch.nn.Module): + def forward(self, data, time): + return data[torch.arange(B).unsqueeze(1), time, :, :] + + class StaticModel(torch.nn.Module): + def forward(self, data): + return data[torch.arange(B).unsqueeze(1), time, :, :] + + torch_model = DynamicModel() if dynamic else StaticModel() + else: + + class DynamicModel(torch.nn.Module): + def __init__(self, B): + super().__init__() + self.slice0 = torch.arange(B).unsqueeze(1) + + def forward(self, data, time): + return data[self.slice0, time, :, :] + + class StaticModel(torch.nn.Module): + def __init__(self, B, time): + super().__init__() + self.slice0 = torch.arange(B).unsqueeze(1) + self.time = time + + def forward(self, data): + return data[self.slice0, self.time, :, :] - class StaticModel(torch.nn.Module): - def forward(self, data): - return data[torch.arange(B).unsqueeze(1), time, :, :] + torch_model = DynamicModel(B) if dynamic else StaticModel(B, time) - torch_model = DynamicModel() if dynamic else StaticModel() input_data = (data, time) if dynamic else data converter_input_type = [ct.TensorType(shape=data.shape)] if dynamic: @@ -5373,6 +5576,7 @@ def forward(self, data): torch_model, input_as_shape=False, converter_input_type=converter_input_type, + frontend=frontend, backend=backend, compute_unit=compute_unit, minimum_deployment_target=ct.target.iOS17, @@ -5631,20 +5835,11 @@ def test_softplus(self, compute_unit, backend, beta, threshold, minimum_deployme @pytest.mark.parametrize( "compute_unit, backend, shape", - itertools.product( - compute_units, - backends, - COMMON_SHAPES_ALL - ), + itertools.product(compute_units, backends, COMMON_SHAPES_ALL), ) def test_mish(self, compute_unit, backend, shape): model = nn.Mish().eval() - self.run_compare_torch( - shape, - model, - backend=backend, - compute_unit=compute_unit - ) + self.run_compare_torch(shape, model, backend=backend, compute_unit=compute_unit) @pytest.mark.parametrize( "compute_unit, backend, shape", @@ -5668,12 +5863,12 @@ def test_silu(self, compute_unit, backend, shape): @pytest.mark.parametrize( "compute_unit, backend, rounding_mode, x2_type", - itertools.product(compute_units, backends, [None, "floor", "trunc"], [np.float32, np.int32]), + itertools.product( + compute_units, backends, [None, "floor", "trunc"], [np.float32, np.int32] + ), ) def test_div(self, compute_unit, backend, rounding_mode, x2_type): - model = ModuleWrapper( - function=torch.div, kwargs={"rounding_mode": rounding_mode} - ) + model = ModuleWrapper(function=torch.div, kwargs={"rounding_mode": rounding_mode}) x1 = torch.from_numpy(np.array([2.3, 2.6, -3.6, -3.2], dtype=np.float32)) x2 = torch.from_numpy(np.array([1.0, 1.0, 1.0, 1.0], dtype=x2_type)) out = torch.div(x1, x2, rounding_mode=rounding_mode) @@ -5689,10 +5884,11 @@ def test_div(self, compute_unit, backend, rounding_mode, x2_type): class TestElementWiseUnary(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape, op_string", + "compute_unit, backend, frontend, shape, op_string", itertools.product( compute_units, backends, + frontends, [(1, 3, 5, 8)], [ "abs", @@ -5715,7 +5911,7 @@ class TestElementWiseUnary(TorchBaseTest): ], ), ) - def test_elementwise_no_params(self, compute_unit, backend, shape, op_string): + def test_elementwise_no_params(self, compute_unit, backend, frontend, shape, op_string): if not contains_op(torch, op_string): return if op_string == "sqrt" and compute_unit != ct.ComputeUnit.CPU_ONLY: @@ -5723,13 +5919,16 @@ def test_elementwise_no_params(self, compute_unit, backend, shape, op_string): op_func = getattr(torch, op_string) model = ModuleWrapper(function=op_func) - self.run_compare_torch(shape, model, backend=backend, compute_unit=compute_unit) + self.run_compare_torch( + shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend + ) @pytest.mark.parametrize( - "compute_unit, backend, shape, clamp_range, minimum_deployment_target", + "compute_unit, backend, frontend, shape, clamp_range, minimum_deployment_target", itertools.product( compute_units, backends, + frontends, [(1, 3, 5, 8)], [ (0.0, 1.0), @@ -5744,7 +5943,9 @@ def test_elementwise_no_params(self, compute_unit, backend, shape, op_string): [None, ct.target.iOS17], ), ) - def test_clamp(self, compute_unit, backend, shape, clamp_range, minimum_deployment_target): + def test_clamp( + self, compute_unit, backend, frontend, shape, clamp_range, minimum_deployment_target + ): params_dict = {} if clamp_range[0] is not None: params_dict["min"] = clamp_range[0] @@ -5755,6 +5956,7 @@ def test_clamp(self, compute_unit, backend, shape, clamp_range, minimum_deployme self.run_compare_torch( shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, rand_range=(-5, 5), @@ -5762,19 +5964,21 @@ def test_clamp(self, compute_unit, backend, shape, clamp_range, minimum_deployme ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_clamp_int_input(self, compute_unit, backend): + def test_clamp_int_input(self, compute_unit, backend, frontend): params_dict = {"min": -2, "max": 2} input_data = torch.randint(low=-5, high=5, size=(2, 3, 4)) model = ModuleWrapper(torch.clamp, params_dict) self.run_compare_torch( input_data, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -5782,19 +5986,21 @@ def test_clamp_int_input(self, compute_unit, backend): ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_clamp_min_int(self, compute_unit, backend): + def test_clamp_min_int(self, compute_unit, backend, frontend): params_dict = {"min": 0} input_data = torch.randint(low=-5, high=5, size=(2, 3, 4)) model = ModuleWrapper(torch.clamp_min, params_dict) self.run_compare_torch( input_data, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -5802,35 +6008,40 @@ def test_clamp_min_int(self, compute_unit, backend): ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_clamp_min_float(self, compute_unit, backend): + def test_clamp_min_float(self, compute_unit, backend, frontend): params_dict = {"min": 0.0} input_data = torch.randn((2, 3, 4)) model = ModuleWrapper(torch.clamp_min, params_dict) self.run_compare_torch( input_data, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, ) @pytest.mark.parametrize( - "compute_unit, backend, shape, threshold, minimum_deployment_target", + "compute_unit, backend, frontend, shape, threshold, minimum_deployment_target", itertools.product( compute_units, backends, + frontends, [(1, 3, 5, 8)], [(0.0, 0.0), (0.5, 0.5), (0.5, 10), (0.9, 0.0)], [None, ct.target.iOS17], ), ) - def test_threshold(self, compute_unit, backend, shape, threshold, minimum_deployment_target): + def test_threshold( + self, compute_unit, backend, frontend, shape, threshold, minimum_deployment_target + ): model = torch.nn.Threshold(threshold[0], threshold[1]).eval() input_value = torch.rand(np.prod(shape)) # make sure the values are not too close to the threshold @@ -5841,6 +6052,7 @@ def test_threshold(self, compute_unit, backend, shape, threshold, minimum_deploy self.run_compare_torch( input_value, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -5848,10 +6060,11 @@ def test_threshold(self, compute_unit, backend, shape, threshold, minimum_deploy ) @pytest.mark.parametrize( - "compute_unit, backend, shape, op_string", + "compute_unit, backend, frontend, shape, op_string", itertools.product( compute_units, backends, + frontends, [(1, 3, 5, 8)], [ "log", @@ -5861,29 +6074,29 @@ def test_threshold(self, compute_unit, backend, shape, threshold, minimum_deploy ), ) def test_elementwise_numerically_stable( - self, compute_unit, backend, shape, op_string + self, compute_unit, backend, frontend, shape, op_string ): op_func = getattr(torch, op_string) model = ModuleWrapper(function=op_func) self.run_compare_torch( shape, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, rand_range=(20, 100), ) @pytest.mark.parametrize( - "compute_unit, backend, dtype", + "compute_unit, backend, frontend, dtype", itertools.product( compute_units, backends, + frontends, [np.int32, np.float32], ), ) - def test_log_dtype( - self, compute_unit, backend, dtype - ): + def test_log_dtype(self, compute_unit, backend, frontend, dtype): SHAPE = (2, 3) input_data = np.random.randint(1, 100, SHAPE).astype(dtype) @@ -5894,56 +6107,58 @@ def test_log_dtype( self.run_compare_torch( input_data, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, - converter_input_type=converter_input_type + converter_input_type=converter_input_type, ) class TestAtan2(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_atan2(self, compute_unit, backend, rank): + def test_atan2(self, compute_unit, backend, frontend, rank): model = ModuleWrapper(function=torch.atan2) input_shape = tuple(np.random.randint(low=1, high=10, size=rank)) - TorchBaseTest.run_compare_torch( + self.run_compare_torch( [input_shape, input_shape], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, - input_as_shape=True, ) @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_atan2_x0(self, compute_unit, backend, rank): + def test_atan2_x0(self, compute_unit, backend, frontend, rank): model = ModuleWrapper(function=torch.atan2) input_shape = tuple(np.random.randint(low=1, high=10, size=rank)) y = generate_input_data(input_shape, rand_range=(-1.0, 1.0)) x = torch.zeros(input_shape) - TorchBaseTest.run_compare_torch( + self.run_compare_torch( (y, x), model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, ) @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_atan2_y0x0(self, compute_unit, backend, rank): + def test_atan2_y0x0(self, compute_unit, backend, frontend, rank): model = ModuleWrapper(function=torch.atan2) input_shape = tuple(np.random.randint(low=1, high=10, size=rank)) y = torch.zeros(input_shape) x = torch.zeros(input_shape) - TorchBaseTest.run_compare_torch( + self.run_compare_torch( (y, x), model, backend=backend, @@ -5952,60 +6167,64 @@ def test_atan2_y0x0(self, compute_unit, backend, rank): ) @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_atan2_broadcast(self, compute_unit, backend, rank): + def test_atan2_broadcast(self, compute_unit, backend, frontend, rank): model = ModuleWrapper(function=torch.atan2) input_shape = tuple(np.random.randint(low=1, high=10, size=rank)) truncated_shape = list(input_shape) while len(truncated_shape) > 1: truncated_shape.pop(0) - TorchBaseTest.run_compare_torch( + self.run_compare_torch( [input_shape, truncated_shape], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, - input_as_shape=True, ) - TorchBaseTest.run_compare_torch( + self.run_compare_torch( [truncated_shape, input_shape], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, - input_as_shape=True, ) class TestTriu(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape, diagonal", + "compute_unit, backend, frontend, shape, diagonal", itertools.product( compute_units, backends, + frontends, [(5, 5), (3, 4), (5, 1)], [None, -1, 0, 2], ), ) - def test_triu(self, compute_unit, backend, shape, diagonal): + def test_triu(self, compute_unit, backend, frontend, shape, diagonal): params_dict = {} if diagonal is not None: params_dict["diagonal"] = diagonal model = ModuleWrapper(torch.triu, params_dict) - self.run_compare_torch(shape, model, backend=backend, compute_unit=compute_unit) + self.run_compare_torch( + shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend + ) class TestTril(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape, diagonal", + "compute_unit, backend, frontend, shape, diagonal", itertools.product( compute_units, backends, + frontends, [(5, 5), (3, 4), (5, 1)], [None, -1, 0, 2], ), ) - def test_tril(self, compute_unit, backend, shape, diagonal): + def test_tril(self, compute_unit, backend, frontend, shape, diagonal): params_dict = {} if diagonal is not None: params_dict["diagonal"] = diagonal @@ -6013,38 +6232,39 @@ def test_tril(self, compute_unit, backend, shape, diagonal): self.run_compare_torch( shape, model, - backend=backend, compute_unit=compute_unit, + backend=backend, + frontend=frontend, ) class TestMatMul(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_bmm(self, compute_unit, backend): + def test_bmm(self, compute_unit, backend, frontend): shape_x, shape_y = (3, 4, 5), (3, 5, 6) model = ModuleWrapper(function=torch.bmm) self.run_compare_torch( - [shape_x, shape_y], model, backend=backend, compute_unit=compute_unit + [shape_x, shape_y], model, compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_bmm_with_fp16_inputs(self, compute_unit, backend): - if backend == ("mlprogram", "fp16"): - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) + def test_bmm_with_fp16_inputs(self, compute_unit, backend, frontend): + if platform.machine() == "x86_64" and ct.utils._macos_version() <= (14, 2): + pytest.xfail("rdar://135925921 ([CI] Upgrade External CI Machine OS)") class TestModel(torch.nn.Module): def forward(self, x, y): @@ -6060,6 +6280,7 @@ def forward(self, x, y): self.run_compare_torch( inputs, TestModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, minimum_deployment_target=ct.target.iOS16, @@ -6069,14 +6290,15 @@ def forward(self, x, y): class TestNumel(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_shape", + "compute_unit, backend, frontend, input_shape", itertools.product( compute_units, backends, + frontends, [(1,), (2, 3)], ), ) - def test_numel(self, compute_unit, backend, input_shape): + def test_numel(self, compute_unit, backend, frontend, input_shape): class TestModel(torch.nn.Module): def forward(self, x): res = torch.numel(x) @@ -6084,44 +6306,47 @@ def forward(self, x): model = TestModel() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestSplit(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, split_size_or_sections, dim", - itertools.product(compute_units, backends, [1, 2, [1, 4]], [0, -2]), + "compute_unit, backend, frontend, split_size_or_sections, dim", + itertools.product(compute_units, backends, frontends, [1, 2, [1, 4]], [0, -2]), ) - def test_split(self, compute_unit, backend, split_size_or_sections, dim): + def test_split(self, compute_unit, backend, frontend, split_size_or_sections, dim): input_shape = (5, 2) model = ModuleWrapper( function=torch.split, kwargs={"split_size_or_sections": split_size_or_sections, "dim": dim}, ) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, frontend=frontend, backend=backend, compute_unit=compute_unit ) @pytest.mark.parametrize( - "compute_unit, backend, split_sizes, dim", - itertools.product(compute_units, backends, [[1, 4], [3, 2]], [-1, -2]), + "compute_unit, backend, frontend, split_sizes, dim", + itertools.product(compute_units, backends, frontends, [[1, 4], [3, 2]], [-1, -2]), ) - def test_split_with_sizes(self, compute_unit, backend, split_sizes, dim): + def test_split_with_sizes(self, compute_unit, backend, frontend, split_sizes, dim): input_shape = (5, 5) model = ModuleWrapper( function=torch.split_with_sizes, kwargs={"split_sizes": split_sizes, "dim": dim}, ) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, frontend=frontend, backend=backend, compute_unit=compute_unit ) @pytest.mark.parametrize( - "compute_unit, backend, dim", - itertools.product(compute_units, backends, [-1]), + "compute_unit, backend, frontend, dim", + itertools.product(compute_units, backends, frontends, [-1]), ) - def test_split_with_dynamic_sizes(self, compute_unit, backend, dim): + def test_split_with_dynamic_sizes(self, compute_unit, backend, frontend, dim): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip("Torch.Export cannot export dynamic sizes") + class TestModel(torch.nn.Module): def forward(self, x): size = x[0] @@ -6136,6 +6361,7 @@ def forward(self, x): model, expected_results=torch_out, input_as_shape=False, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -6155,6 +6381,7 @@ def forward(self, x): model, expected_results=torch_out, input_as_shape=False, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -6162,44 +6389,45 @@ def forward(self, x): class TestUnbind(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, dim", - itertools.product(compute_units, backends, [0, 1, 2]), + "compute_unit, backend, frontend, dim", + itertools.product(compute_units, backends, frontends, [0, 1, 2]), ) - def test_unbind(self, compute_unit, backend, dim): + def test_unbind(self, compute_unit, backend, frontend, dim): input_shape = (3, 3, 4) model = ModuleWrapper(function=torch.unbind, kwargs={"dim": dim}) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_unbind_one_dim_shape(self, compute_unit, backend): + def test_unbind_one_dim_shape(self, compute_unit, backend, frontend): input_shape = (1,) dim = 0 model = ModuleWrapper(function=torch.unbind, kwargs={"dim": dim}) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestTranspose(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shape, dims", + "compute_unit, backend, frontend, shape, dims", itertools.product( - compute_units, backends, COMMON_SHAPES, [(0, 1), (-2, -1), (1, 0), (-1, -2)] + compute_units, backends, frontends, COMMON_SHAPES, [(0, 1), (-2, -1), (1, 0), (-1, -2)] ), ) - def test(self, compute_unit, backend, shape, dims): - model = ModuleWrapper( - function=torch.transpose, kwargs={"dim0": dims[0], "dim1": dims[1]} + def test(self, compute_unit, backend, frontend, shape, dims): + model = ModuleWrapper(function=torch.transpose, kwargs={"dim0": dims[0], "dim1": dims[1]}) + self.run_compare_torch( + shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) - self.run_compare_torch(shape, model, backend=backend, compute_unit=compute_unit) class TestTo(TorchBaseTest): @@ -6289,11 +6517,11 @@ def forward(self, input_data): ) def test_to_no_param(self, compute_unit, backend: Tuple[str], input_type): if input_type == np.float16 and backend[0] == "neuralnetwork": + pytest.skip("Input float16 needs target >= iOS16, which doesn't support neuralnetwork.") + if input_type == np.float16 and _macos_version() < (13, 0): pytest.skip( - "Input float16 needs target >= iOS16, which doesn't support neuralnetwork." + "Input float16 needs target >= iOS16, which is not available until macOS 13." ) - if input_type == np.float16 and _macos_version() < (13, 0): - pytest.skip("Input float16 needs target >= iOS16, which is not available until macOS 13.") class TestModel(torch.nn.Module): def forward(self, input_data): @@ -6342,10 +6570,7 @@ def forward(self, x): lambda var: var.op and "range_1d" in var.op.op_type ) mlmodel = self.run_compare_torch( - [(1, 2, 3)], - model, - backend=backend, - compute_unit=compute_unit + [(1, 2, 3)], model, backend=backend, compute_unit=compute_unit ) prog = mlmodel[1]._mil_program # The range_1d op translated from `torch.arange` shouldn't be folded. @@ -6362,7 +6587,7 @@ class TestSlice(TorchBaseTest): def test_slice(self, compute_unit, backend, frontend, start, end, step): class SliceModel(torch.nn.Module): def forward(self, x): - y = x[start : end : step] + y = x[start:end:step] return y model = SliceModel() @@ -6382,7 +6607,7 @@ def forward(self, x): ), ) def test_dynamic_slice(self, compute_unit, backend, frontend): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2189: " "torch.export Cannot Use Dynamic Index to Slice" @@ -6405,9 +6630,7 @@ def forward(self, tokens, context, context_length): tokens_embeddings = self.tokens_embedding(tokens) context_embeddings = self.context_embedding(context) embeddings = torch.cat((context_embeddings, tokens_embeddings), dim=0) - embeddings = self.dynamic_slicer( - embeddings, torch.squeeze(context_length) - ) + embeddings = self.dynamic_slicer(embeddings, torch.squeeze(context_length)) return embeddings @@ -6430,41 +6653,48 @@ def forward(self, tokens, context, context_length): class TestRepeat(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_repeat(self, compute_unit, backend, rank): + def test_repeat(self, compute_unit, backend, frontend, rank): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip("ectedly found a in the inputs") + input_shape = np.random.randint(low=2, high=6, size=rank) repeats = np.random.randint(low=2, high=4, size=rank) input_shape = tuple(input_shape) model = ModuleWrapper(function=lambda x: x.repeat(*repeats)) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, backend=backend, compute_unit=compute_unit, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, (1, 2)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, (1, 2)), ) - def test_repeats_with_extra_dimensions(self, compute_unit, backend, rank): + def test_repeats_with_extra_dimensions(self, compute_unit, backend, frontend, rank): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip("unexpectedly found a in the inputs") + input_shape = np.random.randint(low=2, high=6, size=rank) for num_extra_dims in (1, 2): repeats = np.random.randint(low=2, high=4, size=rank + num_extra_dims) model = ModuleWrapper(function=lambda x: x.repeat(*repeats)) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, backend=backend, compute_unit=compute_unit, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_repeats_with_enumerated_shape_case1(self, compute_unit, backend): + def test_repeats_with_enumerated_shape_case1(self, compute_unit, backend, frontend): class Model(nn.Module): def forward(self, x, y): reps = x.size(0) @@ -6484,16 +6714,18 @@ def forward(self, x, y): ], backend=backend, compute_unit=compute_unit, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_repeats_with_enumerated_shape_case2(self, compute_unit, backend): + def test_repeats_with_enumerated_shape_case2(self, compute_unit, backend, frontend): class Model(nn.Module): def forward(self, x, y): return y.repeat(x.size(0), x.size(1)) @@ -6511,16 +6743,18 @@ def forward(self, x, y): ], backend=backend, compute_unit=compute_unit, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_repeats_with_symbolic_shape(self, compute_unit, backend): + def test_repeats_with_symbolic_shape(self, compute_unit, backend, frontend): class Model(nn.Module): def forward(self, x, y): return y.repeat([x.shape[-1], 1, x.shape[0]]) @@ -6543,29 +6777,69 @@ def forward(self, x, y): ], backend=backend, compute_unit=compute_unit, + frontend=frontend, ) class TestRepeatInterleave(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank, repeat", + "compute_unit, backend, frontend, rank, dim, repeat", itertools.product( compute_units, backends, + frontends, (1, 3, 5), - (2, torch.tensor(3), torch.tensor([4])), + (None, 0, 1, 2, 3, 4), + (1, torch.tensor(1), torch.tensor([1]), 2, torch.tensor(3), torch.tensor([4])), ), ) - def test_scalar_repeat(self, compute_unit, backend, rank, repeat): + def test_scalar_repeat(self, compute_unit, backend, frontend, rank, dim, repeat): + if dim is not None and dim >= rank: + pytest.skip() + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.repeat_interleave.Tensor is not Aten Canonical") + input_shape = tuple(np.random.randint(low=1, high=6, size=rank)) - for dim in [None] + [*range(rank)]: - model = ModuleWrapper(function=lambda x: x.repeat_interleave(repeat, dim=dim)) - self.run_compare_torch(input_shape, model, backend=backend, compute_unit=compute_unit) + model = ModuleWrapper(function=lambda x: x.repeat_interleave(repeat, dim=dim)) + + mlmodel = self.run_compare_torch( + input_shape, + model, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + )[1] + # when repeat = 1, repeat_interelave is a noop + if repeat in (1, torch.tensor(1), torch.tensor([1])): + assert get_op_types_in_program(mlmodel._mil_program) in ( + ["identity"], + ["identity", "identity"], + ["cast", "cast"], + ["reshape"], + ["cast", "reshape", "cast"], + ) + + @pytest.mark.parametrize( + "compute_unit, backend, frontend", + itertools.product( + compute_units, + backends, + frontends, + ), + ) + def test_single_fill_tensor_repeat(self, compute_unit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.repeat_interleave.Tensor is not Aten Canonical") - def test_single_fill_tensor_repeat(self): input_shape = (3, 2) model = ModuleWrapper(function=lambda x: x.repeat_interleave(torch.tensor([2, 2]), dim=1)) - self.run_compare_torch(input_shape, model) + self.run_compare_torch( + input_shape, + model, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + ) def test_unsupported_tensor_repeat(self): input_shape = (4, 1, 3) @@ -6578,6 +6852,52 @@ def test_unsupported_tensor_repeat(self): ): self.run_compare_torch(input_shape, model) + @pytest.mark.parametrize( + "compute_unit, backend, frontend, dim", + itertools.product( + compute_units, + backends, + frontends, + (None, -4, -3, -2, -1), + ), + ) + def test_dynamic(self, compute_unit, backend, frontend, dim): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch size op does not work on FakeTensor") + if platform.machine() == "x86_64": + pytest.xfail("rdar://135843153 ([Bug] Models failed on x86_64 platform)") + + input_shape = (2, 3, 5, 7) + + class Model(torch.nn.Module): + def forward(self, x): + return x.repeat_interleave(2, dim=dim) + + model = Model() + + torch_export_dynamic_shapes = None + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + batch_dim = torch.export.Dim(name="batch_dim", max=128) + sequence_length = torch.export.Dim(name="sequence_length", max=256) + torch_export_dynamic_shapes = {"x": {0: batch_dim, 2: sequence_length}} + + converter_input_type = None + if frontend == TorchFrontend.TORCHSCRIPT: + batch_dim = RangeDim(lower_bound=2, upper_bound=128) + sequence_length = RangeDim(lower_bound=2, upper_bound=256) + input_symbolic_shape = (batch_dim, 3, sequence_length, 7) + converter_input_type = [TensorType(shape=input_symbolic_shape)] + + self.run_compare_torch( + input_shape, + model, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, + torch_export_dynamic_shapes=torch_export_dynamic_shapes, + converter_input_type=converter_input_type, + ) + class TestStd(TorchBaseTest): @pytest.mark.parametrize( @@ -6680,30 +7000,54 @@ def forward(self, x): class TestFill(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank, dynamic, fill_scalar, src_dtype", + "compute_unit, backend, frontend, rank, dynamic, fill_scalar, src_dtype", itertools.product( compute_units, backends, + frontends, [1, 3], [False, True], [0.2, torch.tensor(float("-inf")), torch.tensor(2)], [torch.int32, torch.float32], ), ) - def test_fill_(self, compute_unit, backend, rank, dynamic, fill_scalar, src_dtype): + def test_fill_(self, compute_unit, backend, frontend, rank, dynamic, fill_scalar, src_dtype): if src_dtype == torch.int32 and fill_scalar == torch.tensor(float("-inf")): pytest.skip("float(-inf) cannot be casted to int.") + if ( + backend[0] == "neuralnetwork" + and fill_scalar == 0.2 + and src_dtype == torch.int32 + and frontend in TORCH_EXPORT_BASED_FRONTENDS + ): + pytest.xfail("rdar://133816197 Cast mb.fill output dtype to EXIR specification") input_shape = np.random.randint(low=2, high=6, size=rank) input_shape = tuple(input_shape) - class FillModel(nn.Module): - def forward(self, x): - y = torch.empty(x.shape, dtype=src_dtype) - y.fill_(fill_scalar) - return y + if frontend == TorchFrontend.TORCHSCRIPT: + + class FillModel(nn.Module): + def forward(self, x): + y = torch.empty(x.shape, dtype=src_dtype) + y.fill_(fill_scalar) + return y + + model = FillModel() + else: + + class FillModel(nn.Module): + def __init__(self, fill_scalar): + super().__init__() + self.fill_scalar = fill_scalar + + def forward(self, x): + y = torch.empty(x.shape, dtype=src_dtype) + y.fill_(self.fill_scalar) + return y + + model = FillModel(fill_scalar) - model = FillModel() if dynamic: upper_bound = 10 if backend[0] == "mlprogram" else -1 if rank == 1: @@ -6731,35 +7075,60 @@ def forward(self, x): input_shape, model, converter_input_type=converter_input_type, + compute_unit=compute_unit, backend=backend, - compute_unit=compute_unit + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend, rank, dynamic, fill_scalar, src_dtype", + "compute_unit, backend, frontend, rank, dynamic, fill_scalar, src_dtype", itertools.product( compute_units, backends, + frontends, [1, 3], [False, True], [0.2, torch.tensor(float("-inf")), torch.tensor(2)], [torch.int32, torch.float32], ), ) - def test_fill__2(self, compute_unit, backend, rank, dynamic, fill_scalar, src_dtype): + def test_fill__2(self, compute_unit, backend, frontend, rank, dynamic, fill_scalar, src_dtype): if src_dtype == torch.int32 and fill_scalar == torch.tensor(float("-inf")): pytest.skip("float(-inf) cannot be casted to int.") + if ( + backend[0] == "neuralnetwork" + and fill_scalar == 0.2 + and src_dtype == torch.int32 + and frontend in TORCH_EXPORT_BASED_FRONTENDS + ): + pytest.xfail("rdar://133816197 Cast mb.fill output dtype to EXIR specification") input_shape = np.random.randint(low=2, high=6, size=rank) input_shape = tuple(input_shape) - class FillModel(nn.Module): - def forward(self, x): - y = torch.empty(x.shape, dtype=src_dtype) - y.fill_(fill_scalar) - return y + 1 + if frontend == TorchFrontend.TORCHSCRIPT: + + class FillModel(nn.Module): + def forward(self, x): + y = torch.empty(x.shape, dtype=src_dtype) + y.fill_(fill_scalar) + return y + 1 + + model = FillModel() + else: + + class FillModel(nn.Module): + def __init__(self, fill_scalar): + super().__init__() + self.fill_scalar = fill_scalar + + def forward(self, x): + y = torch.empty(x.shape, dtype=src_dtype) + y.fill_(self.fill_scalar) + return y + 1 + + model = FillModel(fill_scalar) - model = FillModel() if dynamic: upper_bound = 10 if backend[0] == "mlprogram" else -1 if rank == 1: @@ -6787,8 +7156,9 @@ def forward(self, x): input_shape, model, converter_input_type=converter_input_type, + compute_unit=compute_unit, backend=backend, - compute_unit=compute_unit + frontend=frontend, ) @@ -7136,10 +7506,10 @@ def forward(self, x, y): class TestLog10(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_log10(self, compute_unit, backend, rank): + def test_log10(self, compute_unit, backend, frontend, rank): class Log10Model(nn.Module): def forward(self, x): return torch.log10(x) @@ -7147,16 +7517,16 @@ def forward(self, x): input_shape = tuple(np.random.randint(low=1, high=10, size=rank)) model = Log10Model() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestLog2(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_log2(self, compute_unit, backend, rank): + def test_log2(self, compute_unit, backend, frontend, rank): class Log2Model(nn.Module): def __init__(self): super(Log2Model, self).__init__() @@ -7167,38 +7537,41 @@ def forward(self, x): input_shape = tuple(np.random.randint(low=1, high=10, size=rank)) model = Log2Model() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestUnique(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, x, return_inverse, return_counts", + "compute_unit, backend, frontend, x, return_inverse, return_counts", itertools.product( compute_units, backends, + frontends, ( [1, 2, 3, 2, 2, 3, 99, -1, 1], [[1, 2, 3, 100], [3, 2, 99, 1]], ), (True, False), (True, False), - ) + ), ) - def test(self, compute_unit, backend, x, return_inverse, return_counts): + def test(self, compute_unit, backend, frontend, x, return_inverse, return_counts): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip("torch._dynamo.exc.Unsupported: dynamic shape operator: aten._unique2") + class Model(nn.Module): def forward(self, x): - return torch.unique( - x, return_inverse=return_inverse, return_counts=return_counts - ) + return torch.unique(x, return_inverse=return_inverse, return_counts=return_counts) - if backend[0] == 'neuralnetwork': + if backend[0] == "neuralnetwork": pytest.xfail("This op is only supported on mlprogram backend.") self.run_compare_torch( torch.Tensor(x), Model(), input_as_shape=False, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -7206,14 +7579,15 @@ def forward(self, x): class TestFlip(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank_dim", + "compute_unit, backend, frontend, rank_dim", itertools.product( compute_units, backends, + frontends, [(1, [0]), (2, [0, 1]), (3, [1]), (4, [0, 1, 2, 3])], ), ) - def test_flip(self, compute_unit, backend, rank_dim): + def test_flip(self, compute_unit, backend, frontend, rank_dim): rank, dim = rank_dim class FlipModel(nn.Module): @@ -7223,16 +7597,17 @@ def forward(self, x): input_shape = tuple(np.random.randint(low=1, high=10, size=rank)) model = FlipModel() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestBitWiseLogical(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, x_y, op_string", + "compute_unit, backend, frontend, x_y, op_string", itertools.product( compute_units, backends, + frontends, [ ([True, False, True, False], [True, True, False, False]), ([[True, False], [True, False]], [[True, True], [False, False]]), @@ -7246,7 +7621,7 @@ class TestBitWiseLogical(TorchBaseTest): ], ), ) - def test_bitwise_logical(self, compute_unit, backend, x_y, op_string): + def test_bitwise_logical(self, compute_unit, backend, frontend, x_y, op_string): if not contains_op(torch, op_string): return op_func = getattr(torch, op_string) @@ -7256,6 +7631,7 @@ def test_bitwise_logical(self, compute_unit, backend, x_y, op_string): self.run_compare_torch( [x, y], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -7264,10 +7640,11 @@ def test_bitwise_logical(self, compute_unit, backend, x_y, op_string): class TestLogicalAnd(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, x_y", + "compute_unit, backend, frontend, x_y", itertools.product( compute_units, backends, + frontends, [ ([True, False, True, False], [True, True, False, False]), ([[True, False], [True, False]], [[True, True], [False, False]]), @@ -7276,7 +7653,7 @@ class TestLogicalAnd(TorchBaseTest): ], ), ) - def test_logical_and(self, compute_unit, backend, x_y): + def test_logical_and(self, compute_unit, backend, frontend, x_y): class TestNet(nn.Module): def forward(self, x, y): return torch.logical_and(x, y) @@ -7287,6 +7664,7 @@ def forward(self, x, y): self.run_compare_torch( [x, y], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -7295,10 +7673,11 @@ def forward(self, x, y): class TestLogicalOr(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, x_y", + "compute_unit, backend, frontend, x_y", itertools.product( compute_units, backends, + frontends, [ ([True, False, True, False], [True, True, False, False]), ([[True, False], [True, False]], [[True, True], [False, False]]), @@ -7307,7 +7686,7 @@ class TestLogicalOr(TorchBaseTest): ], ), ) - def test_logical_or(self, compute_unit, backend, x_y): + def test_logical_or(self, compute_unit, backend, frontend, x_y): class TestNet(nn.Module): def forward(self, x, y): return torch.logical_or(x, y) @@ -7318,6 +7697,7 @@ def forward(self, x, y): self.run_compare_torch( [x, y], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -7326,10 +7706,11 @@ def forward(self, x, y): class TestLogicalXor(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, x_y", + "compute_unit, backend, frontend, x_y", itertools.product( compute_units, backends, + frontends, [ ([True, False, True, False], [True, True, False, False]), ([[True, False], [True, False]], [[True, True], [False, False]]), @@ -7338,7 +7719,7 @@ class TestLogicalXor(TorchBaseTest): ], ), ) - def test_logical_xor(self, compute_unit, backend, x_y): + def test_logical_xor(self, compute_unit, backend, frontend, x_y): class TestNet(nn.Module): def forward(self, x, y): return torch.logical_xor(x, y) @@ -7349,6 +7730,64 @@ def forward(self, x, y): self.run_compare_torch( [x, y], model, + frontend=frontend, + backend=backend, + compute_unit=compute_unit, + input_as_shape=False, + ) + + +class TestLogicalNot(TorchBaseTest): + @pytest.mark.parametrize( + "compute_unit, backend, frontend, input_dtype", + itertools.product( + compute_units, + backends, + frontends, + [torch.int32, torch.float32, torch.bool], + ), + ) + def test_logical_not(self, compute_unit, backend, frontend, input_dtype): + class TestModel(torch.nn.Module): + def forward(self, x): + return torch.logical_not(x) + + input_data = torch.randint( + low=0, high=2 if input_dtype == torch.bool else 4, size=(2, 3, 4), dtype=input_dtype + ) + self.run_compare_torch( + input_data, + TestModel(), + frontend=frontend, + backend=backend, + compute_unit=compute_unit, + input_as_shape=False, + ) + + @pytest.mark.parametrize( + "compute_unit, backend, frontend, input_dtype, output_dtype", + itertools.product( + compute_units, + backends, + frontends, + [torch.int32, torch.float32, torch.bool], + [torch.int16, torch.float16, torch.bool], + ), + ) + def test_logical_not_with_out(self, compute_unit, backend, frontend, input_dtype, output_dtype): + class TestModel(torch.nn.Module): + def forward(self, x): + out_tensor = torch.empty((2, 3, 4), dtype=output_dtype) + torch.logical_not(x, out=out_tensor) + return out_tensor + + input_data = torch.randint( + low=0, high=2 if input_dtype == torch.bool else 4, size=(2, 3, 4), dtype=input_dtype + ) + self.run_compare_torch( + input_data, + TestModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -7467,7 +7906,7 @@ def forward(self, cond, x, y): itertools.product(compute_units, backends, frontends, COMMON_SHAPES + [(10,)]), ) def test_where_single_param(self, compute_unit, backend, frontend, shape): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2183: " "Operator torch._ops.aten._assert_async.msg is not Aten Canonical" @@ -7533,7 +7972,7 @@ def forward(self, x): itertools.product(compute_units, backends, frontends) ) def test_dynamic_index(self, compute_unit, backend, frontend): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2189: " "torch.export Cannot Use Dynamic Index to Select" @@ -7545,11 +7984,11 @@ def forward(self, float_arr, int_arr): float_arr[dynamic_index] = 12.95 return float_arr - a = torch.Tensor([1., 2., 4., 5]) + a = torch.Tensor([1.0, 2.0, 4.0, 5]) i = torch.Tensor([0, 1, 2]).long() - inputs_types=[ + inputs_types = [ ct.TensorType(name="a", shape=a.shape), - ct.TensorType(name="i", shape=i.shape, dtype=np.int32) + ct.TensorType(name="i", shape=i.shape, dtype=np.int32), ] self.run_compare_torch( @@ -7559,14 +7998,16 @@ def forward(self, float_arr, int_arr): converter_input_type=inputs_types, frontend=frontend, backend=backend, - compute_unit=compute_unit + compute_unit=compute_unit, ) @pytest.mark.parametrize( "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends), ) - def test_dynamic_index_with_explicit_slice_on_all_other_dims(self, compute_unit, backend, frontend): + def test_dynamic_index_with_explicit_slice_on_all_other_dims( + self, compute_unit, backend, frontend + ): class SelectModel(torch.nn.Module): def forward(self, x, position): y = x[:, :, position] @@ -7585,15 +8026,18 @@ def forward(self, x, position): class TestNonZero(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank, as_tuple", + "compute_unit, backend, frontend, rank, as_tuple", itertools.product( compute_units, backends, + frontends, [1, 3], [False, True], ), ) - def test_non_zero(self, compute_unit, backend, rank, as_tuple): + def test_non_zero(self, compute_unit, backend, frontend, rank, as_tuple): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip("Cannot support _assert_async") if rank == 1: input_shape = 10 @@ -7616,6 +8060,7 @@ def test_non_zero(self, compute_unit, backend, rank, as_tuple): input, model, input_as_shape=False, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -7623,24 +8068,86 @@ def test_non_zero(self, compute_unit, backend, rank, as_tuple): class TestTorchTensor(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank", + "compute_unit, backend, frontend, rank", itertools.product( compute_units, backends, + frontends, [0, 1, 2, 3, 4, 5], ), ) - def test_torch_tensor(self, compute_unit, backend, rank): - class Model(nn.Module): - def __init__(self, rank): - super(Model, self).__init__() - self.rank = rank + def test_torch_tensor(self, compute_unit, backend, frontend, rank): + if frontend == TorchFrontend.TORCHSCRIPT: - def forward(self, x): - with torch.no_grad(): + class Model(nn.Module): + def __init__(self, rank): + super(Model, self).__init__() + self.rank = rank + + def forward(self, x): + with torch.no_grad(): + if self.rank == 0: + res = self.generate_tensor_rank_0(x) + return torch.unsqueeze(res, 0) + if self.rank == 1: + return self.generate_tensor_rank_1(x) + if self.rank == 2: + return self.generate_tensor_rank_2(x) + if self.rank == 3: + return self.generate_tensor_rank_3(x) + if self.rank == 4: + return self.generate_tensor_rank_4(x) + if self.rank == 5: + return self.generate_tensor_rank_5(x) + + @torch.jit.script + def generate_tensor_rank_0(x): + _, _, _, w = x.shape + return torch.tensor(w, dtype=torch.int32) + + @torch.jit.script + def generate_tensor_rank_1(x): + _, _, h, w = x.shape + return torch.tensor([h, w, 0, 1], dtype=torch.int32) + + @torch.jit.script + def generate_tensor_rank_2(x): + _, _, h, w = x.shape + return torch.tensor([[0, h], [h, w], [w, w]], dtype=torch.float32) + + @torch.jit.script + def generate_tensor_rank_3(x): + _, _, h, w = x.shape + return torch.tensor([[[h, 1]], [[3, w]]], dtype=torch.int32) + + @torch.jit.script + def generate_tensor_rank_4(x): + _, _, h, w = x.shape + return torch.tensor( + [ + [[[h, h], [h, w]], [[w, w], [w, 1]]], + [[[0, 0], [1, 1]], [[0, h], [h, w]]], + ], + dtype=torch.float32, + ) + + @torch.jit.script + def generate_tensor_rank_5(x): + _, _, h, w = x.shape + return torch.tensor( + [[[[[h, w], [w, w]], [[1, 1], [0, h]]]]], dtype=torch.float32 + ) + + else: + + class Model(nn.Module): + def __init__(self, rank): + super(Model, self).__init__() + self.rank = rank + + def forward(self, x): if self.rank == 0: - res = self.generate_tensor_rank_0(x) - return torch.unsqueeze(res, 0) + return self.generate_tensor_rank_0(x) if self.rank == 1: return self.generate_tensor_rank_1(x) if self.rank == 2: @@ -7652,53 +8159,50 @@ def forward(self, x): if self.rank == 5: return self.generate_tensor_rank_5(x) - @torch.jit.script - def generate_tensor_rank_0(x): - _, _, _, w = x.shape - return torch.tensor(w, dtype=torch.int32) - - @torch.jit.script - def generate_tensor_rank_1(x): - _, _, h, w = x.shape - return torch.tensor([h, w, 0, 1], dtype=torch.int32) - - @torch.jit.script - def generate_tensor_rank_2(x): - _, _, h, w = x.shape - return torch.tensor([[0, h], [h, w], [w, w]], dtype=torch.float32) - - @torch.jit.script - def generate_tensor_rank_3(x): - _, _, h, w = x.shape - return torch.tensor([[[h, 1]], [[3, w]]], dtype=torch.int32) - - @torch.jit.script - def generate_tensor_rank_4(x): - _, _, h, w = x.shape - return torch.tensor( - [ - [[[h, h], [h, w]], [[w, w], [w, 1]]], - [[[0, 0], [1, 1]], [[0, h], [h, w]]], - ], - dtype=torch.float32, - ) + def generate_tensor_rank_0(self, x): + _, _, _, w = x.shape + return torch.tensor(w, dtype=torch.int32) + + def generate_tensor_rank_1(self, x): + _, _, h, w = x.shape + return torch.tensor([h, w, 0, 1], dtype=torch.int32) + + def generate_tensor_rank_2(self, x): + _, _, h, w = x.shape + return torch.tensor([[0, h], [h, w], [w, w]], dtype=torch.float32) + + def generate_tensor_rank_3(self, x): + _, _, h, w = x.shape + return torch.tensor([[[h, 1]], [[3, w]]], dtype=torch.int32) + + def generate_tensor_rank_4(self, x): + _, _, h, w = x.shape + return torch.tensor( + [ + [[[h, h], [h, w]], [[w, w], [w, 1]]], + [[[0, 0], [1, 1]], [[0, h], [h, w]]], + ], + dtype=torch.float32, + ) - @torch.jit.script - def generate_tensor_rank_5(x): - _, _, h, w = x.shape - return torch.tensor( - [[[[[h, w], [w, w]], [[1, 1], [0, h]]]]], dtype=torch.float32 - ) + def generate_tensor_rank_5(self, x): + _, _, h, w = x.shape + return torch.tensor( + [[[[[h, w], [w, w]], [[1, 1], [0, h]]]]], dtype=torch.float32 + ) shape = (1, 1, 3, 4) model = Model(rank) - self.run_compare_torch(shape, model, backend=backend, compute_unit=compute_unit) + self.run_compare_torch( + shape, model, compute_unit=compute_unit, backend=backend, frontend=frontend + ) @pytest.mark.parametrize( - "compute_unit, backend, torch_op", + "compute_unit, backend, frontend, torch_op", itertools.product( compute_units, backends, + frontends, [ torch.abs, torch.acos, @@ -7725,7 +8229,10 @@ def generate_tensor_rank_5(x): ], ), ) - def test_torch_rank0_tensor(self, compute_unit, backend, torch_op): + def test_torch_rank0_tensor(self, compute_unit, backend, frontend, torch_op): + if frontend == TorchFrontend.EXECUTORCH and torch_op == torch.exp2: + pytest.skip("torch._ops.aten.exp2.default is not Aten Canonical") + class Model(nn.Module): def forward(self, x: torch.Tensor) -> torch.Tensor: return torch_op(torch.tensor(0.1)) @@ -7735,6 +8242,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: torch.tensor([1.0, 2.0, 3.0]), model, input_as_shape=False, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -7821,15 +8329,6 @@ def test_tensor_assign_case_broadcast( pytest.xfail( "rdar://128024502 ([Bug][iOS18] slice_update failing test on backends beside CPU_ONLY + Classic CPU)" ) - else: - if ( - backend == "mlprogram" - and shape == (5, 4, 3) - and minimum_deployment_target == ct.target.iOS18 - ): - pytest.xfail( - "rdar://128024502 ([Bug][iOS18] slice_update failing test on backends beside CPU_ONLY + Classic CPU)" - ) class TensorAssignModel(torch.nn.Module): def __init__(self): @@ -8010,16 +8509,16 @@ def test_tensor_assign_dynamic_slice( pytest.xfail( "rdar://128024502 ([Bug][iOS18] slice_update failing test on backends beside CPU_ONLY + Classic CPU)" ) - else: - # On BNNS, some cases are passing, only static cases are failing - if ( - backend[0] == "mlprogram" - and not dynamic - and minimum_deployment_target == ct.target.iOS18 - ): - pytest.xfail( - "rdar://128024502 ([Bug][iOS18] slice_update failing test on backends beside CPU_ONLY + Classic CPU)" - ) + + if ( + backend[0] == "mlprogram" + and not dynamic + and minimum_deployment_target == ct.target.iOS18 + ): + pytest.xfail( + "rdar://133494070 [iOS18] [Slice_Update] " + "Toy iOS18.slice_update Model Passes in BNNS but Dies in Core ML" + ) # general case with dynamic begin and end class TensorAssignModel(torch.nn.Module): @@ -8123,27 +8622,21 @@ def forward(self, x): class TestSelectScatter(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, minimum_deployment_target, input_shape", + "compute_unit, backend, frontend, minimum_deployment_target, input_shape", itertools.product( compute_units, backends, + frontends, [None, ct.target.iOS18], [(1,), (4,), (3, 4), (1, 2, 4)], ), ) - def test_select_scatter(self, compute_unit, backend, minimum_deployment_target, input_shape): + def test_select_scatter( + self, compute_unit, backend, frontend, minimum_deployment_target, input_shape + ): rank = len(input_shape) - if ( - input_shape == (1, 2, 4) - and minimum_deployment_target == ct.target.iOS18 - ): - pytest.xfail( - "rdar://128024502 ([Bug][iOS18] slice_update failing test on backends beside CPU_ONLY + Classic CPU)" - ) - def test_model(src_shape, dim, index): - class SelectScatterModel(torch.nn.Module): def forward(self, x, y): return torch.select_scatter( @@ -8172,13 +8665,17 @@ def forward(self, x, y): res = self.run_compare_torch( [input_shape, src_shape], model, + frontend=frontend, backend=backend, compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, ) # check slice_update is used - if minimum_deployment_target == ct.target.iOS18: + if ( + minimum_deployment_target == ct.target.iOS18 + and frontend != TorchFrontend.EXECUTORCH + ): prog = res[1]._mil_program assert "slice_update" in get_op_types_in_program(prog) @@ -8192,15 +8689,18 @@ def forward(self, x, y): class TestSliceScatter(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, minimum_deployment_target, input_shape", + "compute_unit, backend, frontend, minimum_deployment_target, input_shape", itertools.product( compute_units, backends, + frontends, [None, ct.target.iOS18], [(1,), (4,), (3, 4), (1, 2, 4)], ), ) - def test_slice_scatter(self, compute_unit, backend, minimum_deployment_target, input_shape): + def test_slice_scatter( + self, compute_unit, backend, frontend, minimum_deployment_target, input_shape + ): rank = len(input_shape) def test_model(src_shape, dim, start, end, step): @@ -8218,6 +8718,7 @@ def forward(self, x, y): res = self.run_compare_torch( [input_shape, src_shape], SliceScatterModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, @@ -8251,7 +8752,7 @@ class TestIndexPut(TorchBaseTest): ), ) def test_index_put_bool_index_case_1(self, compute_unit, backend, frontend, minimum_deployment_target): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2183: " "Operator torch._ops.aten._assert_async.msg is not Aten Canonical" @@ -8287,7 +8788,10 @@ def forward(self, x, y): def test_index_put_bool_index_case_2( self, compute_unit, backend, frontend, rank, minimum_deployment_target ): - if backend[0] == "neuralnetwork" and frontend == TorchFrontend.EXIR: + if backend[0] == "neuralnetwork" and frontend in ( + TorchFrontend.TORCHEXPORT, + TorchFrontend.EXECUTORCH, + ): pytest.xfail( "https://github.com/apple/coremltools/issues/2185: " "EXIR IndexPut Fails on NeuralNetwork Backend" @@ -8296,7 +8800,40 @@ def test_index_put_bool_index_case_2( class IndexPutModel(torch.nn.Module): def forward(self, x): mask = torch.tensor([True, False, False, False, True, True]).view(3, 2) - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + x = x.clone() + if rank == 0: + x[mask] = 0.0 + if rank == 1: + x[mask] = torch.tensor([1.0]) + return x + + self.run_compare_torch( + (3, 2), + IndexPutModel(), + frontend=frontend, + backend=backend, + compute_unit=compute_unit, + minimum_deployment_target=minimum_deployment_target, + ) + + @pytest.mark.parametrize( + "compute_unit, backend, frontend, rank, minimum_deployment_target", + itertools.product( + compute_units, + backends, + frontends, + [0, 1], + [None, ct.target.iOS17], + ), + ) + def test_index_put_bool_index_all_false( + self, compute_unit, backend, frontend, rank, minimum_deployment_target + ): + class IndexPutModel(torch.nn.Module): + def forward(self, x): + mask = torch.tensor([False, False, False, False, False, False]).view(3, 2) + if frontend in TORCH_EXPORT_BASED_FRONTENDS: x = x.clone() if rank == 0: x[mask] = 0.0 @@ -8310,7 +8847,7 @@ def forward(self, x): frontend=frontend, backend=backend, compute_unit=compute_unit, - minimum_deployment_target=minimum_deployment_target + minimum_deployment_target=minimum_deployment_target, ) @pytest.mark.parametrize( @@ -8322,8 +8859,13 @@ def forward(self, x): [None, ct.target.iOS17], ), ) - def test_index_put_dynamic_bool_index(self, compute_unit, backend, frontend, minimum_deployment_target): - if backend[0] == "neuralnetwork" and frontend == TorchFrontend.EXIR: + def test_index_put_dynamic_bool_index( + self, compute_unit, backend, frontend, minimum_deployment_target + ): + if backend[0] == "neuralnetwork" and frontend in ( + TorchFrontend.TORCHEXPORT, + TorchFrontend.EXECUTORCH, + ): pytest.xfail( "https://github.com/apple/coremltools/issues/2185: " "EXIR IndexPut Fails on NeuralNetwork Backend" @@ -8334,7 +8876,7 @@ def test_index_put_dynamic_bool_index(self, compute_unit, backend, frontend, min class IndexPutModel(torch.nn.Module): def forward(self, x, y): mask = y > 1 - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: x = x.clone() x[y > 1] = 0.0 return x @@ -8367,7 +8909,10 @@ def forward(self, x, y): def test_index_put_int_index_case_1( self, compute_unit, backend, frontend, rank, accumulate, minimum_deployment_target ): - if backend[0] == "neuralnetwork" and frontend == TorchFrontend.EXIR: + if backend[0] == "neuralnetwork" and frontend in ( + TorchFrontend.TORCHEXPORT, + TorchFrontend.EXECUTORCH, + ): pytest.xfail( "https://github.com/apple/coremltools/issues/2185: " "EXIR IndexPut Fails on NeuralNetwork Backend" @@ -8375,7 +8920,7 @@ def test_index_put_int_index_case_1( class IndexPutModel(torch.nn.Module): def forward(self, x, indices, values): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: x = x.clone() x.index_put_(tuple(indices.t()), values, accumulate=accumulate) return x @@ -8550,15 +9095,15 @@ def forward(self, x, position, val): def test_index_put_negative_indices_case_1( self, compute_unit, backend, frontend, accumulate, minimum_deployment_target ): - if backend[0] == "neuralnetwork" and frontend == TorchFrontend.EXIR: - pytest.xfail( - "https://github.com/apple/coremltools/issues/2185: " - "EXIR IndexPut Fails on NeuralNetwork Backend" + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip( + "https://github.com/pytorch/pytorch/issues/134443 " + "Torch exported program outputs fake tensor" ) class IndexPutModel(torch.nn.Module): def forward(self, x): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: x = x.clone() x.index_put_( indices=(torch.LongTensor([0, -1]), torch.LongTensor([-2, 1])), @@ -8590,7 +9135,10 @@ def forward(self, x): def test_index_put_negative_indices_case_2( self, compute_unit, backend, frontend, rank, accumulate, minimum_deployment_target ): - if backend[0] == "neuralnetwork" and frontend == TorchFrontend.EXIR: + if backend[0] == "neuralnetwork" and frontend in ( + TorchFrontend.TORCHEXPORT, + TorchFrontend.EXECUTORCH, + ): pytest.xfail( "https://github.com/apple/coremltools/issues/2185: " "EXIR IndexPut Fails on NeuralNetwork Backend" @@ -8606,7 +9154,7 @@ def test_index_put_negative_indices_case_2( class IndexPutModel(torch.nn.Module): def forward(self, x, indices, values): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: x = x.clone() x.index_put_(tuple(indices.t()), values, accumulate=accumulate) return x @@ -8660,7 +9208,7 @@ class TestIndex(TorchBaseTest): def test_index_bool_indices( self, compute_unit, backend, frontend, input_dtype, shape, minimum_deployment_target ): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2183: " "Operator torch._ops.aten._assert_async.msg is not Aten Canonical" @@ -8721,7 +9269,7 @@ def forward(self, x, y): def test_index_int_index_case_1( self, compute_unit, backend, frontend, input_dtype, shape, minimum_deployment_target ): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2184: " "Cannot Convert Empty EXIR Model" @@ -9057,7 +9605,7 @@ def forward(self, x): def test_index_int_index_case_9( self, compute_unit, backend, frontend, input_dtype, shape, minimum_deployment_target ): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2183: " "Operator torch._ops.aten._assert_async.msg is not Aten Canonical" @@ -9101,7 +9649,7 @@ def forward(self, x): def test_index_int_index_case_10( self, compute_unit, backend, frontend, input_dtype, shape, minimum_deployment_target ): - if frontend == TorchFrontend.EXIR: + if frontend in TORCH_EXPORT_BASED_FRONTENDS: pytest.xfail( "https://github.com/apple/coremltools/issues/2183: " "Operator torch._ops.aten._assert_async.msg is not Aten Canonical" @@ -9289,12 +9837,11 @@ def test_index_select_invalid_indices(self): class TestLoss(TorchBaseTest): @pytest.mark.parametrize( "compute_unit, backend, rank, reduction", - itertools.product( - compute_units, backends, range(1, 4), ["none", "mean", "sum"] - ), + itertools.product(compute_units, backends, range(1, 4), ["none", "mean", "sum"]), ) def test_mse_loss(self, compute_unit, backend, rank: int, reduction: str): input_shape = tuple(np.random.randint(low=1, high=5, size=rank)) + class Model(torch.nn.Module): def __init__(self): super().__init__() @@ -9305,19 +9852,23 @@ def forward(self, x, y): input_shapes = [input_shape, input_shape] - self.run_compare_torch( - input_shapes, Model(), backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(input_shapes, Model(), backend=backend, compute_unit=compute_unit) class TestPad(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, rank, mode", + "compute_unit, backend, frontend, rank, mode", itertools.product( - compute_units, backends, range(3, 5), ["reflect", "replicate"] + compute_units, backends, frontends, range(3, 5), ["reflect", "replicate"] ), ) - def test_pad_reflect_replicate(self, compute_unit, backend, rank: int, mode: str): + def test_pad_reflect_replicate(self, compute_unit, backend, frontend, rank: int, mode: str): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip( + "torch._dynamo.exc.UserError: Tried to use data-dependent value " + "in the subsequent computation" + ) + if rank == 3: pad_len = 2 input_shape = (5, 10, 10) @@ -9330,18 +9881,22 @@ def test_pad_reflect_replicate(self, compute_unit, backend, rank: int, mode: str ) max_pad = min(input_shape[-1], input_shape[-2]) pad = list(np.random.randint(low=0, high=max_pad, size=pad_len)) - model = ModuleWrapper( - function=torch.nn.functional.pad, kwargs={"pad": pad, "mode": mode} - ) + model = ModuleWrapper(function=torch.nn.functional.pad, kwargs={"pad": pad, "mode": mode}) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, backend=backend, compute_unit=compute_unit, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, rank", - itertools.product(compute_units, backends, range(1, 6)), + "compute_unit, backend, frontend, rank", + itertools.product(compute_units, backends, frontends, range(1, 6)), ) - def test_pad_constant(self, compute_unit, backend, rank: int): + def test_pad_constant(self, compute_unit, backend, frontend, rank: int): + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + pytest.skip( + "torch._dynamo.exc.UserError: Tried to use data-dependent value in the subsequent " + "computation" + ) + if rank > 5: raise NotImplementedError("Only supports < 6D constant padding") val = float(np.random.random(1)) @@ -9353,63 +9908,71 @@ def test_pad_constant(self, compute_unit, backend, rank: int): kwargs={"pad": pad, "mode": "constant", "value": val}, ) self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, + model, + backend=backend, + compute_unit=compute_unit, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_constant_pad_1d(self, compute_unit, backend): + def test_constant_pad_1d(self, compute_unit, backend, frontend): input_shape = (3, 4, 5) model = torch.nn.ConstantPad1d((5, 6), 3.5).eval() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, backend=backend, compute_unit=compute_unit, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_constant_pad_2d(self, compute_unit, backend): + def test_constant_pad_2d(self, compute_unit, backend, frontend): input_shape = (3, 4, 5, 6) model = torch.nn.ConstantPad2d((5, 6, 3, 8), 3.5).eval() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, backend=backend, compute_unit=compute_unit, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_constant_pad_3d(self, compute_unit, backend): + def test_constant_pad_3d(self, compute_unit, backend, frontend): input_shape = (3, 4, 5, 6, 2) model = torch.nn.ConstantPad3d((5, 6, 3, 8, 2, 4), 3.5).eval() self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, model, backend=backend, compute_unit=compute_unit, frontend=frontend ) class TestMaskedFill(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, dtype, value", + "compute_unit, backend, frontend, dtype, value", itertools.product( compute_units, backends, + frontends, [np.int32, np.float32], [10.3, 7, 0], ), ) - def test_masked_fill(self, compute_unit, backend, dtype, value): + def test_masked_fill(self, compute_unit, backend, frontend, dtype, value): SHAPE = (2, 3) MASK = torch.bernoulli(torch.rand(SHAPE[-1])).to(torch.bool) @@ -9418,9 +9981,10 @@ def test_masked_fill(self, compute_unit, backend, dtype, value): model = ModuleWrapper(torch.masked_fill, {"mask": MASK, "value": value}) converter_input_type = [TensorType(shape=SHAPE, dtype=dtype)] - TorchBaseTest.run_compare_torch( + self.run_compare_torch( input_data, model, + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -9430,10 +9994,11 @@ def test_masked_fill(self, compute_unit, backend, dtype, value): class TestMeshgrid(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, x, y, z, dtype, inp_mode, indexing", + "compute_unit, backend, frontend, x, y, z, dtype, inp_mode, indexing", itertools.product( compute_units, backends, + frontends, [1, 2], [3, 4], [5, 6], @@ -9446,6 +10011,7 @@ def test_meshgrid( self, compute_unit, backend, + frontend, x, y, z, @@ -9474,6 +10040,7 @@ def forward(self, x, y, z): model, expected_results, input_as_shape=False, + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -9486,17 +10053,15 @@ class TestAddmm(TorchBaseTest): compute_units, backends, ((2, 2, 2), (4, 5, 9)), - (1., 2.), - (1., 3.), - ) + (1.0, 2.0), + (1.0, 3.0), + ), ) def test_addmm(self, compute_unit, backend, shapes, beta, alpha): - class TestModel(nn.Module): def forward(self, x): return torch.addmm(x, m1, m2, beta=beta, alpha=alpha) - m, n, p = shapes # m1 @ m2 must be legal @@ -9506,7 +10071,10 @@ def forward(self, x): x_shape = (m, p) self.run_compare_torch( - x_shape, TestModel(), backend=backend, compute_unit=compute_unit, + x_shape, + TestModel(), + backend=backend, + compute_unit=compute_unit, ) @@ -9539,7 +10107,10 @@ def forward(self, x): for dim in dims: m = TestModel(dim, shapes) self.run_compare_torch( - shapes, m, backend=backend, compute_unit=compute_unit, + shapes, + m, + backend=backend, + compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, ) @@ -9556,7 +10127,9 @@ def forward(self, x): [None, ct.target.iOS17], ), ) - def test_scatter_with_scalar_source(self, compute_unit, backend, shapes_dims, minimum_deployment_target): + def test_scatter_with_scalar_source( + self, compute_unit, backend, shapes_dims, minimum_deployment_target + ): class TestModel(nn.Module): def __init__(self, dim, shapes): super(TestModel, self).__init__() @@ -9571,7 +10144,10 @@ def forward(self, x): for dim in dims: m = TestModel(dim, shapes) self.run_compare_torch( - shapes, m, backend=backend, compute_unit=compute_unit, + shapes, + m, + backend=backend, + compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, ) @@ -9605,7 +10181,10 @@ def forward(self, x): for dim in dims: m = TestModel(dim, shapes, mode) self.run_compare_torch( - shapes, m, backend=backend, compute_unit=compute_unit, + shapes, + m, + backend=backend, + compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, ) @@ -9685,27 +10264,29 @@ def forward(self, x): class TestBroadcastTensors(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [(1,), (1, 2)], ), ) - def test_one_tensor(self, compute_unit, backend, shapes): + def test_one_tensor(self, compute_unit, backend, frontend, shapes): class TestModel(nn.Module): def forward(self, a): return torch.broadcast_tensors(a) self.run_compare_torch( - shapes, TestModel().eval(), backend=backend, compute_unit=compute_unit + shapes, TestModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [ [(2, 1), (1, 3)], [(5, 1, 4, 1), (3, 1, 1)], @@ -9714,20 +10295,21 @@ def forward(self, a): ], ), ) - def test_two_tensors(self, compute_unit, backend, shapes): + def test_two_tensors(self, compute_unit, backend, frontend, shapes): class TestModel(nn.Module): def forward(self, a, b): return torch.broadcast_tensors(a, b) self.run_compare_torch( - shapes, TestModel().eval(), backend=backend, compute_unit=compute_unit + shapes, TestModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [ [(2, 1), (1, 3), (1,), (1, 1)], [(5, 1, 4, 1), (3, 1, 1), (1,), (4, 8)], @@ -9735,13 +10317,13 @@ def forward(self, a, b): ], ), ) - def test_four_tensors(self, compute_unit, backend, shapes): + def test_four_tensors(self, compute_unit, backend, frontend, shapes): class TestModel(nn.Module): def forward(self, a, b, c, d): return torch.broadcast_tensors(a, b, c, d) self.run_compare_torch( - shapes, TestModel().eval(), backend=backend, compute_unit=compute_unit + shapes, TestModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @@ -9800,17 +10382,15 @@ def test_embedding_invalid_indices(self): class TestDuplicateOutputTensors(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_dtype", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, - [np.int32, np.float32], + frontends, ), ) # Test case for rdar://100138064 (Duplicate output tensors trigger ops removal errors). - def test_duplicate_output_not_raise_errors( - self, compute_unit, backend, input_dtype - ): + def test_duplicate_output_not_raise_errors(self, compute_unit, backend, frontend): if backend[0] == "neuralnetwork": pytest.skip( "rdar://100243127 ([PyTorch] Duplicate Output Tensor Doesn't work for neuralnetwork)" @@ -9829,6 +10409,7 @@ def forward(self, x): model, expected_results=expected_results, input_as_shape=False, + frontend=frontend, backend=backend, compute_unit=compute_unit, converter_input_type=converter_input_type, @@ -9864,9 +10445,7 @@ def forward(self, x): model = BaddbmmModel() # Makes it broadcastable to (B, N, P). for input_shape in [(1, N, P), (B, 1, P), (1, P)]: - self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(input_shape, model, backend=backend, compute_unit=compute_unit) class TestGlu(TorchBaseTest): @@ -9883,17 +10462,16 @@ def test_glu(self, compute_unit, backend, shapes): glu_dim_list = [-1] + [i for i in range(len(shapes))] for glu_dim in glu_dim_list: model = torch.nn.GLU(glu_dim) - self.run_compare_torch( - shapes, model, backend=backend, compute_unit=compute_unit - ) + self.run_compare_torch(shapes, model, backend=backend, compute_unit=compute_unit) class TestHstack(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [ [(2, 4, 6), (2, 4, 6)], [(1, 4, 5), (1, 2, 5)], @@ -9901,24 +10479,25 @@ class TestHstack(TorchBaseTest): ], # Test 1-D tensors. ), ) - def test_hstack(self, compute_unit, backend, shapes): + def test_hstack(self, compute_unit, backend, frontend, shapes): class HstackModel(nn.Module): def forward(self, *tensors): return torch.hstack(tensors) self.run_compare_torch( - shapes, HstackModel(), backend=backend, compute_unit=compute_unit + shapes, HstackModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [[(2, 4, 6), (2, 4, 6)]], ), ) - def test_hstack_with_parameter_out(self, compute_unit, backend, shapes): + def test_hstack_with_parameter_out(self, compute_unit, backend, frontend, shapes): class HstackModel(nn.Module): def forward(self, *tensors): output_tensor = torch.tensor([]) @@ -9926,16 +10505,17 @@ def forward(self, *tensors): return output_tensor self.run_compare_torch( - shapes, HstackModel(), backend=backend, compute_unit=compute_unit + shapes, HstackModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestRemainder(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [ [(2, 4, 6), (2, 4, 6)], [(2, 4, 6), (4, 6)], # broadcastable tensors @@ -9943,24 +10523,25 @@ class TestRemainder(TorchBaseTest): ], ), ) - def test_remainder(self, compute_unit, backend, shapes): + def test_remainder(self, compute_unit, backend, frontend, shapes): class RemainderModel(nn.Module): def forward(self, dividend, divisor): return torch.remainder(dividend, divisor) self.run_compare_torch( - shapes, RemainderModel(), backend=backend, compute_unit=compute_unit + shapes, RemainderModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend, shapes", + "compute_unit, backend, frontend, shapes", itertools.product( compute_units, backends, + frontends, [[(2, 4, 6), (2, 4, 6)]], ), ) - def test_remainder_with_parameter_out(self, compute_unit, backend, shapes): + def test_remainder_with_parameter_out(self, compute_unit, backend, frontend, shapes): class RemainderModel(nn.Module): def forward(self, dividend, divisor): output_tensor = torch.tensor([]) @@ -9968,17 +10549,18 @@ def forward(self, dividend, divisor): return output_tensor self.run_compare_torch( - shapes, RemainderModel(), backend=backend, compute_unit=compute_unit + shapes, RemainderModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_remainder_input_types_promotion(self, compute_unit, backend): + def test_remainder_input_types_promotion(self, compute_unit, backend, frontend): class RemainderModel(nn.Module): def forward(self, dividend, divisor): return torch.remainder(dividend, divisor) @@ -9988,6 +10570,7 @@ def forward(self, dividend, divisor): self.run_compare_torch( [input_dividend, input_divisor], RemainderModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, @@ -9997,9 +10580,7 @@ def forward(self, dividend, divisor): class TestSum(TorchBaseTest): @pytest.mark.parametrize( "compute_unit, backend, input_dtype", - itertools.product( - compute_units, backends, [torch.int32, torch.float32, torch.bool] - ), + itertools.product(compute_units, backends, [torch.int32, torch.float32, torch.bool]), ) def test_sum(self, compute_unit, backend, input_dtype): model = ModuleWrapper(function=torch.sum) @@ -10119,19 +10700,15 @@ def test_roll(self, compute_unit, backend, shape, shifts): # Negative shifts [[-9, -1], [1, 2]], # Duplicate dims - [[8, 10, -8], [0, 1, 0]] + [[8, 10, -8], [0, 1, 0]], ], ), ) def test_roll_with_dims(self, compute_unit, backend, shape, shifts_dims): shifts, dims = shifts_dims model = ModuleWrapper(torch.roll, kwargs={"shifts": shifts, "dims": dims}) - self.run_compare_torch( - shape, - model, - backend=backend, - compute_unit=compute_unit - ) + self.run_compare_torch(shape, model, backend=backend, compute_unit=compute_unit) + class TestArgmax(TorchBaseTest): @pytest.mark.parametrize( @@ -10152,14 +10729,8 @@ def test_argmax( axis: int, input_dtype: np.dtype, ): - input_data = ( - torch.rand(*shape) - if input_dtype == np.float32 - else torch.randint(10, shape) - ) - converter_input_type = [ - ct.TensorType(shape=input_data.shape, dtype=input_dtype) - ] + input_data = torch.rand(*shape) if input_dtype == np.float32 else torch.randint(10, shape) + converter_input_type = [ct.TensorType(shape=input_data.shape, dtype=input_dtype)] model = ModuleWrapper(function=torch.argmax, kwargs={"dim": axis}) expected_results = model(input_data) TorchBaseTest.run_compare_torch( @@ -10205,13 +10776,17 @@ def forward(self, *inputs): class TestComplex(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_complex(self, compute_unit: ct.ComputeUnit, backend): + def test_complex(self, compute_unit: ct.ComputeUnit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class ComplexModel(torch.nn.Module): def forward(self, x): real_part = x + 1 @@ -10220,33 +10795,46 @@ def forward(self, x): return torch.stack([complex_data.real, complex_data.imag], dim=1) TorchBaseTest.run_compare_torch( - (2, 3, 4), ComplexModel(), backend=backend, compute_unit=compute_unit + (2, 3, 4), + ComplexModel(), + compute_unit=compute_unit, + backend=backend, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_complex_real_imag_same_input(self, compute_unit: ct.ComputeUnit, backend): + def test_complex_real_imag_same_input(self, compute_unit: ct.ComputeUnit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class ComplexModel(torch.nn.Module): def forward(self, x): return torch.complex(x, x).real TorchBaseTest.run_compare_torch( - (2, 3, 4), ComplexModel(), backend=backend, compute_unit=compute_unit + (2, 3, 4), + ComplexModel(), + compute_unit=compute_unit, + backend=backend, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_complex_input_error(self, compute_unit: ct.ComputeUnit, backend): + def test_complex_input_error(self, compute_unit: ct.ComputeUnit, backend, frontend): class ComplexModel(torch.nn.Module): def forward(self, x): return torch.complex(x.real, x.imag) @@ -10256,45 +10844,54 @@ def forward(self, x): TypeError, match="dtype= is unsupported for inputs/outputs of the model", ): - converter_input_type = [ - ct.TensorType(shape=input_data.shape, dtype=np.complex64) - ] + converter_input_type = [ct.TensorType(shape=input_data.shape, dtype=np.complex64)] TorchBaseTest.run_compare_torch( input_data, ComplexModel(), - backend=backend, - compute_unit=compute_unit, input_as_shape=False, converter_input_type=converter_input_type, + compute_unit=compute_unit, + backend=backend, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_complex_output_error(self, compute_unit: ct.ComputeUnit, backend): + def test_complex_output_error(self, compute_unit: ct.ComputeUnit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class ComplexModel(torch.nn.Module): def forward(self, x): return torch.complex(x, x) - with pytest.raises( - ValueError, match="MIL doesn't support complex data as model's output" - ): + with pytest.raises(ValueError, match="MIL doesn't support complex data as model's output"): TorchBaseTest.run_compare_torch( - (2, 3, 4), ComplexModel(), backend=backend, compute_unit=compute_unit + (2, 3, 4), + ComplexModel(), + compute_unit=compute_unit, + backend=backend, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, - ) + frontends, + ), ) - def test_abs(self, compute_unit, backend): + def test_abs(self, compute_unit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class AbsModel(torch.nn.Module): def forward(self, x): x = torch.complex(x, x) @@ -10303,6 +10900,7 @@ def forward(self, x): TorchBaseTest.run_compare_torch( (1, 16), AbsModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -10310,72 +10908,92 @@ def forward(self, x): class TestReal(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_real_real_input(self, compute_unit: ct.ComputeUnit, backend): + def test_real_real_input(self, compute_unit: ct.ComputeUnit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class RealModel(torch.nn.Module): def forward(self, x): return torch.real(x) TorchBaseTest.run_compare_torch( - (2, 3, 4), RealModel(), backend=backend, compute_unit=compute_unit + (2, 3, 4), RealModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_real_complex_input(self, compute_unit: ct.ComputeUnit, backend): + def test_real_complex_input(self, compute_unit: ct.ComputeUnit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class RealModel(torch.nn.Module): def forward(self, x): return torch.real(torch.complex(x, x)) TorchBaseTest.run_compare_torch( - (2, 3, 4), RealModel(), backend=backend, compute_unit=compute_unit + (2, 3, 4), RealModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestImag(TorchBaseTest): # torch.imag only support complex input, so we don't need to test real number input. @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_imag_complex_input(self, compute_unit: ct.ComputeUnit, backend): + def test_imag_complex_input(self, compute_unit: ct.ComputeUnit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class ImagModel(torch.nn.Module): def forward(self, x): return torch.imag(torch.complex(x, x)) TorchBaseTest.run_compare_torch( - (2, 3, 4), ImagModel(), backend=backend, compute_unit=compute_unit + (2, 3, 4), ImagModel(), compute_unit=compute_unit, backend=backend, frontend=frontend ) class TestViewAsReal(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_view_as_real(self, compute_unit: ct.ComputeUnit, backend): + def test_view_as_real(self, compute_unit: ct.ComputeUnit, backend, frontend): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten.complex.default is not Aten Canonical") + class RealModel(torch.nn.Module): def forward(self, x): return torch.view_as_real(torch.complex(x, 2 * x)) TorchBaseTest.run_compare_torch( - (2, 3, 4), RealModel(), backend=backend, compute_unit=compute_unit + (2, 3, 4), + RealModel(), + compute_unit=compute_unit, + backend=backend, + frontend=frontend, ) @@ -10387,16 +11005,12 @@ class TestFft(TorchBaseTest): backends, ), ) - def test_directly_use_fft_complex_output_error( - self, compute_unit: ct.ComputeUnit, backend - ): + def test_directly_use_fft_complex_output_error(self, compute_unit: ct.ComputeUnit, backend): class FftModel(torch.nn.Module): def forward(self, x): return torch.fft.fft(x) - with pytest.raises( - ValueError, match="MIL doesn't support complex data as model's output" - ): + with pytest.raises(ValueError, match="MIL doesn't support complex data as model's output"): TorchBaseTest.run_compare_torch( (2, 3, 4), FftModel(), backend=backend, compute_unit=compute_unit ) @@ -10446,9 +11060,7 @@ def forward(self, x): [None, "forward", "backward", "ortho"], ), ) - def test_fft_basic( - self, compute_unit: ct.ComputeUnit, backend, fft_variant, n, dim, norm - ): + def test_fft_basic(self, compute_unit: ct.ComputeUnit, backend, fft_variant, n, dim, norm): class FftModel(torch.nn.Module): def forward(self, x): if fft_variant == "fft": @@ -10738,12 +11350,17 @@ class NmsModel(torch.nn.Module): def forward(self, boxes, scores): return torchvision.ops.nms(boxes, scores, iou_threshold=0.2) - input_boxes = torch.tensor([[3., 2., 3., 0.], - [0., 0., 2., 2.], - [1., 3., 2., 1.], - [0., 2., 1., 3.], - [1., 1., 2., 3.]], dtype=torch.float32) - input_scores = torch.tensor([3., 2., 0., 1., 4.], dtype=torch.float32) + input_boxes = torch.tensor( + [ + [3.0, 2.0, 3.0, 0.0], + [0.0, 0.0, 2.0, 2.0], + [1.0, 3.0, 2.0, 1.0], + [0.0, 2.0, 1.0, 3.0], + [1.0, 1.0, 2.0, 3.0], + ], + dtype=torch.float32, + ) + input_scores = torch.tensor([3.0, 2.0, 0.0, 1.0, 4.0], dtype=torch.float32) converter_input_type = [ ct.TensorType(shape=input_boxes.shape), ct.TensorType(shape=input_scores.shape), @@ -10826,43 +11443,63 @@ def forward(self, boxes, scores): class TestTensorSize(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, - ) + frontends, + ), ) - def test_tensor_size(self, compute_unit: ct.ComputeUnit.CPU_ONLY, backend: List[Tuple[str]]): + def test_tensor_size( + self, compute_unit: ct.ComputeUnit.CPU_ONLY, backend: List[Tuple[str]], frontend + ): class TestModel(torch.nn.Module): def forward(self, x): - return x.size() + # torch.export cannot deal with + # * non-tensor output (because torch.export will try to call .detach) + # * empty graph (i.e. no tenosr operation) + # so we use an op to wrap the output into tensor + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + return torch.tensor(x.size()) + else: + return x.size() self.run_compare_torch( [(1, 2, 3)], TestModel(), backend=backend, compute_unit=compute_unit, + frontend=frontend, ) @pytest.mark.parametrize( - "compute_unit, backend, dim, minimum_deployment_target", + "compute_unit, backend, frontend, dim, minimum_deployment_target", itertools.product( compute_units, - [('mlprogram', "fp16")], + [("mlprogram", "fp16")], + frontends, [2, -1], [None, ct.target.iOS17], - ) + ), ) def test_tensor_size_with_dim( self, compute_unit: ct.ComputeUnit.CPU_ONLY, backend: List[Tuple[str]], + frontend, dim: int, minimum_deployment_target: ct.target, ): class TestModel(torch.nn.Module): def forward(self, x): - return x.size(dim=dim) + # torch.export cannot deal with + # * non-tensor output (because torch.export will try to call .detach) + # * empty graph (i.e. no tenosr operation) + # so we use an op to wrap the output into tensor + if frontend in TORCH_EXPORT_BASED_FRONTENDS: + return torch.tensor(x.size(dim=dim)) + else: + return x.size(dim=dim) self.run_compare_torch( [(1, 2, 3)], @@ -10870,19 +11507,24 @@ def forward(self, x): backend=backend, compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, + frontend=frontend, ) class TestBitwiseAnd(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) def test_bitwise_and( - self, compute_unit: ct.ComputeUnit.CPU_ONLY, backend: List[Tuple[str]] + self, + compute_unit: ct.ComputeUnit.CPU_ONLY, + backend: List[Tuple[str]], + frontend: TorchFrontend, ): class TestModel(torch.nn.Module): def forward(self, x, y): @@ -10894,32 +11536,33 @@ def forward(self, x, y): self.run_compare_torch( [input_data_x, input_data_y], TestModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, ) @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) def test_bitwise_and_unsupport_input( - self, compute_unit: ct.ComputeUnit.CPU_ONLY, backend: List[Tuple[str]] + self, + compute_unit: ct.ComputeUnit.CPU_ONLY, + backend: List[Tuple[str]], + frontend: TorchFrontend, ): class TestModel(torch.nn.Module): def forward(self, x, y): return torch.bitwise_and(x, y) input_shape = (2, 3) - input_data_x = torch.randint( - low=0, high=10, size=input_shape, dtype=torch.int32 - ) - input_data_y = torch.randint( - low=0, high=10, size=input_shape, dtype=torch.int32 - ) + input_data_x = torch.randint(low=0, high=10, size=input_shape, dtype=torch.int32) + input_data_y = torch.randint(low=0, high=10, size=input_shape, dtype=torch.int32) with pytest.raises( NotImplementedError, match="The `bitwise_and` op only supports boolean input", @@ -10927,78 +11570,32 @@ def forward(self, x, y): self.run_compare_torch( [input_data_x, input_data_y], TestModel(), + frontend=frontend, backend=backend, compute_unit=compute_unit, input_as_shape=False, ) -class TestLogicalNot(TorchBaseTest): - @pytest.mark.parametrize( - "compute_unit, backend, input_dtype", - itertools.product( - compute_units, - backends, - [torch.int32, torch.float32, torch.bool], - ), - ) - def test_logical_not(self, compute_unit, backend, input_dtype): - class TestModel(torch.nn.Module): - def forward(self, x): - return torch.logical_not(x) - - input_data = torch.randint( - low=0, high=2 if input_dtype == torch.bool else 4, size=(2, 3, 4), dtype=input_dtype - ) - self.run_compare_torch( - input_data, - TestModel(), - backend=backend, - compute_unit=compute_unit, - input_as_shape=False, - ) - - @pytest.mark.parametrize( - "compute_unit, backend, input_dtype, output_dtype", - itertools.product( - compute_units, - backends, - [torch.int32, torch.float32, torch.bool], - [torch.int16, torch.float16, torch.bool], - ), - ) - def test_logical_not_with_out(self, compute_unit, backend, input_dtype, output_dtype): - class TestModel(torch.nn.Module): - def forward(self, x): - out_tensor = torch.empty((2, 3, 4), dtype=output_dtype) - torch.logical_not(x, out=out_tensor) - return out_tensor - - input_data = torch.randint( - low=0, high=2 if input_dtype == torch.bool else 4, size=(2, 3, 4), dtype=input_dtype - ) - self.run_compare_torch( - input_data, - TestModel(), - backend=backend, - compute_unit=compute_unit, - input_as_shape=False, - ) - - class TestUnfold(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_shape, kernel_size, padding, stride", + "compute_unit, backend, frontend, input_shape, kernel_size, padding, stride", itertools.product( compute_units, backends, + frontends, [(1, 1, 10, 11), (5, 3, 12, 13)], [(2, 3)], [0, 1, 8, (1, 3), (2, 6), (0, 5)], [1, 2, 7, (2, 3), (5, 4)], ), ) - def test_unfold(self, compute_unit, backend, input_shape, kernel_size, padding, stride): + def test_unfold( + self, compute_unit, backend, frontend, input_shape, kernel_size, padding, stride + ): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("ExecuTorch produces rank > 5 tensor") + self.run_compare_torch( input_shape, ModuleWrapper( @@ -11007,8 +11604,9 @@ def test_unfold(self, compute_unit, backend, input_shape, kernel_size, padding, "kernel_size": kernel_size, "padding": padding, "stride": stride, - } + }, ), + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -11035,24 +11633,28 @@ def construct_block_count( block_count = 1 for i in range(dim): block_count *= np.floor( - (output_size[i] + 2 * padding[i] - dilation[i] * (kernel_size[i] - 1) - 1) / stride[i] + (output_size[i] + 2 * padding[i] - dilation[i] * (kernel_size[i] - 1) - 1) + / stride[i] + 1 ).astype(np.int32) return block_count - @pytest.mark.parametrize( - "compute_unit, backend, N, C, output_size, kernel_size", + "compute_unit, backend, frontend, N, C, output_size, kernel_size", itertools.product( compute_units, backends, + frontends, [1, 2], [1, 3], [(12, 12), (12, 24)], [(2, 2), (2, 3)], ), ) - def test_unfold(self, compute_unit, backend, N, C, output_size, kernel_size): + def test_unfold(self, compute_unit, backend, frontend, N, C, output_size, kernel_size): + if frontend == TorchFrontend.EXECUTORCH: + pytest.skip("torch._ops.aten._unsafe_index_put.default is not Aten Canonical") + block_count = self.construct_block_count( output_size, kernel_size, @@ -11066,8 +11668,9 @@ def test_unfold(self, compute_unit, backend, N, C, output_size, kernel_size): "output_size": output_size, "kernel_size": kernel_size, "stride": kernel_size, - } + }, ), + frontend=frontend, backend=backend, compute_unit=compute_unit, ) @@ -11075,13 +11678,14 @@ def test_unfold(self, compute_unit, backend, N, C, output_size, kernel_size): class TestTupleUnpack(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend", + "compute_unit, backend, frontend", itertools.product( compute_units, backends, + frontends, ), ) - def test_tuple_unpack(self, compute_unit, backend): + def test_tuple_unpack(self, compute_unit, backend, frontend): class ReturnTupleModel(nn.Module): def forward(self, x): return x * 3, x * 4, x * 5 @@ -11095,17 +11699,22 @@ def forward(self, x): out1, out2, out3 = self.return_tuple_layer(x) return out1.relu(), out2.sigmoid(), out3.softmax(1) - self.run_compare_torch((1, 2, 3), TestModel(), backend=backend, compute_unit=compute_unit) + self.run_compare_torch( + (1, 2, 3), TestModel(), compute_unit=compute_unit, backend=backend, frontend=frontend + ) class TestTupleIndex(TorchBaseTest): @pytest.mark.parametrize( "compute_unit, backend", - itertools.product(compute_units, backends,), + itertools.product( + compute_units, + backends, + ), ) def test_tuple_index(self, compute_unit, backend): class InnerModel(nn.Module): - def forward(self,x): + def forward(self, x): return (torch.tensor([0]), torch.tensor([1])) class OuterModel(nn.Module): @@ -11118,11 +11727,20 @@ def forward(self, x): return inner[0] x = torch.rand(1, 3, 640, 640) - self.run_compare_torch(x, OuterModel(), - input_as_shape=False, use_scripting=True, - backend=backend, compute_unit=compute_unit) + self.run_compare_torch( + x, + OuterModel(), + input_as_shape=False, + use_scripting=True, + backend=backend, + compute_unit=compute_unit, + ) +@pytest.mark.skipif( + platform.machine() == "x86_64", + reason="The x86_64 has outdated PyTorch, which doesn't have _scaled_dot_product_flash_attention in fx node.", +) class TestScaledDotProductAttention(TorchBaseTest): """ Tests for torch.nn.functional.scaled_dot_product_attention op @@ -11169,12 +11787,15 @@ def test_different_batch_dims(self, compute_unit, backend, frontend, minimum_dep minimum_deployment_target=minimum_deployment_target, ) - # Only iOS 18 with torch script can have mb.sdpa, because - # 1. mb.sdpa is introduced in iOS 18, so before iOS 18 we would decompose sdpa - # 2. torch.sdpa is not a core aten op, so EXIR would decompose sdpa - if minimum_deployment_target == ct.target.iOS18 and frontend == TorchFrontend.TORCHSCRIPT: - if backend == ("mlprogram", "fp16"): - assert get_op_types_in_program(res[1]._mil_program) == [ + # mb.sdpa is introduced in iOS 18, so before iOS 18 we would decompose sdpa + # torch.sdpa is not a core aten op, so executorch would decompose sdpa + if ( + backend[0] == "mlprogram" + and minimum_deployment_target == ct.target.iOS18 + and frontend != TorchFrontend.EXECUTORCH + ): + if backend[1] == "fp16": + expected_ops = [ "cast", "tile", "cast", @@ -11182,6 +11803,9 @@ def test_different_batch_dims(self, compute_unit, backend, frontend, minimum_dep "cast", "scaled_dot_product_attention", ] + else: + expected_ops = ["tile", "tile", "scaled_dot_product_attention"] + assert get_op_types_in_program(res[1]._mil_program) == expected_ops @pytest.mark.parametrize( "compute_unit, backend, frontend, minimum_deployment_target, rank, dynamic", @@ -11208,7 +11832,7 @@ def test_different_input_ranks_no_mask( elif rank == 4: input_shape = (batch_size, n_heads_1, seq_len, d) elif rank == 5: - input_shape = (batch_size, n_heads_1, n_heads_1, seq_len, d) + input_shape = (batch_size, n_heads_1, n_heads_2, seq_len, d) else: raise ValueError("invalid rank") @@ -11241,46 +11865,42 @@ def test_different_input_ranks_no_mask( minimum_deployment_target=minimum_deployment_target, ) - # Only iOS 18 with torch script can have mb.sdpa, because - # 1. mb.sdpa is introduced in iOS 18, so before iOS 18 we would decompose sdpa - # 2. torch.sdpa is not a core aten op, so EXIR would decompose sdpa - if minimum_deployment_target == ct.target.iOS18 and frontend == TorchFrontend.TORCHSCRIPT: - if backend == ("mlprogram", "fp16"): - if rank == 2: - if dynamic: - expected_ops = [ - "expand_dims", - "expand_dims", - "expand_dims", - "scaled_dot_product_attention", - "squeeze", - ] - else: - expected_ops = [ - "cast", - "expand_dims", - "cast", - "expand_dims", - "cast", - "expand_dims", - "scaled_dot_product_attention", - "squeeze", - ] - assert get_op_types_in_program(coreml_model._mil_program) == expected_ops - + # mb.sdpa is introduced in iOS 18, so before iOS 18 we would decompose sdpa + # torch.sdpa is not a core aten op, so executorch would decompose sdpa + if ( + backend[0] == "mlprogram" + and minimum_deployment_target == ct.target.iOS18 + and frontend != TorchFrontend.EXECUTORCH + ): + pymil_inputs = list(coreml_model._mil_program.functions["main"].inputs.values()) + is_io_fp16 = pymil_inputs[0].dtype == types.fp16 + is_io_precision_same_as_compute_precision = is_io_fp16 == (backend[1] == "fp16") + if rank == 2: + if is_io_precision_same_as_compute_precision: + expected_ops = [ + "expand_dims", + "expand_dims", + "expand_dims", + "scaled_dot_product_attention", + "squeeze", + ] else: - if dynamic: - expected_ops = [ - "scaled_dot_product_attention", - ] - else: - expected_ops = [ - "cast", - "cast", - "cast", - "scaled_dot_product_attention", - ] - assert get_op_types_in_program(coreml_model._mil_program) == expected_ops + expected_ops = [ + "cast", + "expand_dims", + "cast", + "expand_dims", + "cast", + "expand_dims", + "scaled_dot_product_attention", + "squeeze", + ] + else: + if is_io_precision_same_as_compute_precision: + expected_ops = ["scaled_dot_product_attention"] + else: + expected_ops = ["cast", "cast", "cast", "scaled_dot_product_attention"] + assert get_op_types_in_program(coreml_model._mil_program) == expected_ops @pytest.mark.parametrize( "compute_unit, backend, frontend, minimum_deployment_target, seq_lengths, include_heads, dynamic", @@ -11304,7 +11924,7 @@ def test_is_causal_flag( include_heads, dynamic, ): - if frontend == TorchFrontend.EXIR: + if frontend == TorchFrontend.EXECUTORCH: pytest.xfail( "https://github.com/apple/coremltools/issues/2199: placeholder assertion error" ) @@ -11369,7 +11989,7 @@ def test_attn_mask( bool_mask, dynamic, ): - if frontend == TorchFrontend.TORCHSCRIPT and bool_mask: + if frontend != TorchFrontend.EXECUTORCH and bool_mask: pytest.xfail( "rdar://110499660 ([CI][Bug] test_attn_mask is occasionally failing when bool_mask = True)" ) @@ -11413,6 +12033,30 @@ def test_attn_mask( input_as_shape=False, ) + @pytest.mark.parametrize( + "compute_unit, backend, frontend", + itertools.product(compute_units, backends, frontends), + ) + def test_scale(self, compute_unit, backend, frontend): + batch_size, seq_len, n_heads, d = 2, 10, 3, 7 + input_shape = (batch_size, n_heads, seq_len, d) + model = ModuleWrapper( + function=nn.functional.scaled_dot_product_attention, + kwargs={ + "attn_mask": None, + "dropout_p": 0.0, + "is_causal": False, + "scale": 1.5, + }, + ) + self.run_compare_torch( + [input_shape] * 3, + model, + frontend=frontend, + backend=backend, + compute_unit=compute_unit, + ) + @pytest.mark.parametrize( "compute_unit, backend, frontend, minimum_deployment_target, mask_as_input, dynamic", itertools.product( @@ -11433,7 +12077,7 @@ def test_toy_xformer_with_sdpa( mask_as_input, dynamic, ): - if frontend == TorchFrontend.EXIR and not mask_as_input: + if frontend == TorchFrontend.EXECUTORCH and not mask_as_input: pytest.xfail( "https://github.com/apple/coremltools/issues/2199: placeholder assertion error" ) @@ -11558,8 +12202,7 @@ def test_dropout_early_error_out(self): value = generate_input_data(value_shape) model = ModuleWrapper( - function=nn.functional.scaled_dot_product_attention, - kwargs={"dropout_p": 0.0} + function=nn.functional.scaled_dot_product_attention, kwargs={"dropout_p": 0.0} ) self.run_compare_torch( (query, key, value), @@ -11575,8 +12218,7 @@ def test_dropout_early_error_out(self): ), ): model = ModuleWrapper( - function=nn.functional.scaled_dot_product_attention, - kwargs={"dropout_p": 0.1} + function=nn.functional.scaled_dot_product_attention, kwargs={"dropout_p": 0.1} ) self.run_compare_torch( (query, key, value), @@ -11600,7 +12242,9 @@ def __init__(self, input_size, hidden_size, nhead=1, num_layers=1, dropout_rate= dim_feedforward=hidden_size, dropout=dropout_rate, ) - self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers) + self.transformer_encoder = nn.TransformerEncoder( + encoder_layers, num_layers=num_layers + ) def forward(self, x): y = self.transformer_encoder(x) @@ -11651,15 +12295,17 @@ def test_transformer(self, compute_unit, backend, dynamic): class TestFliplr(TorchBaseTest): @pytest.mark.parametrize( - "compute_unit, backend, input_shape", - itertools.product(compute_units, backends, [(2, 3), (3, 4, 5), (8, 2, 6, 4)]), + "compute_unit, backend, frontend, input_shape", + itertools.product(compute_units, backends, frontends, [(2, 3), (3, 4, 5), (8, 2, 6, 4)]), ) - def test_fliplr(self, compute_unit, backend, input_shape): + def test_fliplr(self, compute_unit, backend, frontend, input_shape): class TestModel(nn.Module): def forward(self, x): return torch.fliplr(x) - self.run_compare_torch(input_shape, TestModel(), backend=backend, compute_unit=compute_unit) + self.run_compare_torch( + input_shape, TestModel(), compute_unit=compute_unit, backend=backend, frontend=frontend + ) class TestMultinomial(TorchBaseTest): diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py index a600faa06..9d429283e 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py @@ -8,9 +8,11 @@ from typing import Optional import numpy as np +import numpy.testing import pytest import torch import torchvision +from packaging.version import Version import coremltools as ct import coremltools.optimize as cto @@ -18,10 +20,13 @@ from coremltools._deps import ( _HAS_TORCH, _HAS_TORCH_VISION, + _HAS_TORCHAO, MSG_TORCH_NOT_FOUND, MSG_TORCH_VISION_NOT_FOUND, + MSG_TORCHAO_NOT_FOUND, ) from coremltools.converters.mil import testing_reqs +from coremltools.converters.mil.frontend.torch.utils import TorchFrontend from coremltools.converters.mil.mil import types from coremltools.converters.mil.testing_utils import get_op_types_in_program from coremltools.optimize.coreml import _quantization_passes @@ -32,7 +37,11 @@ create_unique_weight, ) -from .testing_utils import TorchBaseTest +from .testing_utils import TorchBaseTest, frontends + +if _HAS_TORCHAO: + import torchao + from torchao.quantization import quant_primitives as torchao_quant pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND) @@ -103,6 +112,7 @@ def run_compare_torch( input_as_shape=True, minimum_deployment_target=ct.target.iOS17, compute_unit=ct.ComputeUnit.CPU_ONLY, + frontend=TorchFrontend.TORCHSCRIPT, converter=ct.convert, ): # TODO(rdar://108472419): properly design a random input @@ -119,6 +129,7 @@ def run_compare_torch( use_scripting=False, compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, + frontend=frontend, converter=converter, ) @@ -431,6 +442,185 @@ def forward(self, x): else: assert get_op_types_in_program(prog) == ["constexpr_blockwise_shift_scale", "matmul"] + @pytest.mark.skipif(not _HAS_TORCHAO, reason=MSG_TORCHAO_NOT_FOUND) + @pytest.mark.parametrize( + "use_numpy, inner_k_tiles, group_size", + itertools.product([True, False], [2, 4, 8], [32, 64]), + ) + def test_unpack_int4packed_by_mm_with_eye_matrix(self, use_numpy, inner_k_tiles, group_size): + """ + Check if the packed weight could be restored by _weight_int4pack_mm with eye matrix on CPU. + + As there is no kernel implemented for CPU to unpack the data packed by `torch._convert_weight_to_int4pack`, + we use `torch._weight_int4pack_mm` to do matrix multiplication with an eye matrix to get unpacked data. + """ + if use_numpy: + y_np = numpy.random.rand(128, 128).astype(np.float32) + y = torch.from_numpy(y_np).to(torch.device("cpu")) + else: + y = torch.rand(128, 128, dtype=torch.float32, device=torch.device("cpu")) + + ( + y_quantized, + y_scales_and_zeros, + ) = torchao.quantization.utils.groupwise_affine_quantize_tensor( + y, n_bit=4, groupsize=group_size, dtype=torch.float32 + ) + y_int4packed = torch._convert_weight_to_int4pack(y_quantized, inner_k_tiles) + y_unpacked_shape = (y_int4packed.shape[0] * 8, y_int4packed.shape[1] * (inner_k_tiles * 16)) + eye_shape = y_unpacked_shape[1] + eye_matrix = torch.eye(eye_shape, device=torch.device("cpu"), dtype=torch.float32) + if Version(torch.__version__) < Version("2.4.0"): + # The `torch._weight_int4pack_mm` op requires bfloat16 before PyTorch 2.4.0. + eye_matrix = eye_matrix.to(torch.bfloat16) + y_scales_and_zeros = y_scales_and_zeros.to(torch.bfloat16) + y_dequant = torch._weight_int4pack_mm( + eye_matrix, + y_int4packed, + group_size, + y_scales_and_zeros, + ) + y_dequant = y_dequant.t().contiguous().float() + + # Makes sure this `_weight_int4pack_mm` with eye matrix fully restores the original y. + np.testing.assert_allclose(y_dequant.numpy(), y.numpy(), atol=0.035, rtol=0.05) + + # Also verifies that the quantized y could be accurately reproduced by torchao utils. + scales = torch.transpose(y_scales_and_zeros[:, :, 0], 0, 1) + zero_points = torch.transpose(y_scales_and_zeros[:, :, 1], 0, 1) + block_size = (1, group_size) + y_dequant_quantized = torchao_quant.quantize_affine( + y_dequant, + block_size, + scales, + zero_points, + torch.int32, + quant_min=0, + quant_max=2**4 - 1, + zero_point_domain=torchao_quant.ZeroPointDomain.FLOAT, + ) + assert torch.equal(y_quantized, y_dequant_quantized) + + # The torchao dequantization utils should be able to recover the original y. + y_dequantized_by_torchao = torchao_quant.dequantize_affine( + y_quantized, + (1, group_size), + scales, + zero_points, + torch.int32, + quant_min=0, + quant_max=2**4 - 1, + zero_point_domain=torchao_quant.ZeroPointDomain.FLOAT, + ) + np.testing.assert_allclose(y_dequant.numpy(), y_dequantized_by_torchao.numpy(), rtol=4e-3) + + @pytest.mark.skipif( + Version(torch.__version__) < Version("2.4.0"), + reason="_weight_int4pack_mm requires bfloat16 before PyTorch 2.4.0", + ) + @pytest.mark.skipif(not _HAS_TORCHAO, reason=MSG_TORCHAO_NOT_FOUND) + @pytest.mark.parametrize( + "compute_unit, inner_k_tiles, group_size", + itertools.product(compute_units, [2, 4, 8], [32, 64]), + ) + def test_weight_int4pack_mm(self, compute_unit, inner_k_tiles, group_size): + y = torch.rand(128, 128, dtype=torch.float32, device=torch.device("cpu")) + + class Model(torch.nn.Module): + def forward(self, x): + ( + y_quantized, + y_scales_and_zeros, + ) = torchao.quantization.utils.groupwise_affine_quantize_tensor( + y, n_bit=4, groupsize=group_size, dtype=torch.float32 + ) + y_int4packed = torch._convert_weight_to_int4pack(y_quantized, inner_k_tiles) + return torch._weight_int4pack_mm(x, y_int4packed, group_size, y_scales_and_zeros) + + model = Model().to(torch.device("cpu")) + input_shape = [(2, 128)] + res = self.run_compare_torch( + input_shape, + model, + minimum_deployment_target=ct.target.iOS18, + compute_unit=compute_unit, + rtol=0.1, + ) + prog = res[1]._mil_program + assert get_op_types_in_program(prog) == ["constexpr_blockwise_shift_scale", "linear"] + + @pytest.mark.skipif( + not hasattr(torch.ops.quantized_decomposed, "embedding_4bit"), + reason="The `embedding_4bit` op doesn't exist in quantized_decomposed custom opset.", + ) + @pytest.mark.parametrize( + "compute_unit, group_size, dtype, signed", + itertools.product( + compute_units, [32, 64], [None, torch.float16, torch.float32], [False, True] + ), + ) + def test_quantized_decomposed_embedding_4bit_dtype( + self, compute_unit, group_size, dtype, signed + ): + if not signed: + # To reproduce this executorch bug, use following settings + # scales = torch.ones(size=scales_shape, dtype=torch.float32, device=torch.device("cpu")) + # input_data = torch.zeros(size=(1, 1), dtype=torch.int32) + # Then you will find coreml outputs is the expected (consistent with `unpacked_weight`). + pytest.skip( + "rdar://135216194 (Executorch embedding_4bit implementation bug for unsigned quantization)" + ) + + quant_low = -8 if signed else 0 + quant_high = 7 if signed else 15 + quant_dtype = torch.int8 if signed else torch.uint8 + + weight_shape = (128, 128) + unpacked_weight = torch.randint( + low=quant_low, + high=quant_high + 1, + size=weight_shape, + dtype=quant_dtype, + ) + # Pack the weight to embedding_4bit's usable format. + weight_range_shifted = unpacked_weight.add(-quant_low).view(torch.uint8) + weight_view = weight_range_shifted.view( + unpacked_weight.shape[0], unpacked_weight.shape[1] // 2, 2 + ) + weight_even = weight_view[:, :, 0] * 16 # left shift 4 + weight_odd = weight_view[:, :, 1] + weight = weight_even + weight_odd + + scales_shape = list(weight_shape) + scales_shape[-1] = weight_shape[-1] // group_size + scales = torch.rand(*scales_shape, dtype=torch.float32) + + class Model(torch.nn.Module): + def forward(self, indices: torch.Tensor): + if dtype is not None: + return torch.ops.quantized_decomposed.embedding_4bit( + weight, scales, None, quant_low, quant_high, indices, dtype=dtype + ) + else: + return torch.ops.quantized_decomposed.embedding_4bit( + weight, scales, None, quant_low, quant_high, indices + ) + + # The 4-bit packing-unpacking in torch could be messed up when transferring between devices, so it's safer + # to specify device at the beginning. + model = Model().to(torch.device("cpu")) + input_data = torch.randint(low=0, high=weight_shape[-1], size=(2, 128), dtype=torch.int32) + res = self.run_compare_torch( + input_data, + model, + input_as_shape=False, + minimum_deployment_target=ct.target.iOS18, + compute_unit=compute_unit, + rtol=1e-3, + ) + prog = res[1]._mil_program + assert get_op_types_in_program(prog) == ["constexpr_blockwise_shift_scale", "gather"] + @pytest.mark.skipif(not _HAS_TORCH_VISION, reason=MSG_TORCH_VISION_NOT_FOUND) class TestTorchvisionQuantizedModels(TorchQuantizationBaseTest): @@ -451,30 +641,36 @@ class TestPytorchCarryCompressionInfo(TorchQuantizationBaseTest): """Test compressed PyTorch models which use register_buffer to carry compression info.""" @pytest.mark.parametrize( - "compute_unit, n_bits, signed, minimum_deployment_target", + "compute_unit, n_bits, signed, use_linear, minimum_deployment_target, frontend", itertools.product( compute_units, [4, 8], [True, False], + [True, False], [ct.target.iOS16, ct.target.iOS18], + frontends, ), ) - def test_quantization(self, compute_unit, n_bits, signed, minimum_deployment_target): + def test_quantization( + self, compute_unit, n_bits, signed, use_linear, minimum_deployment_target, frontend + ): if n_bits == 4 and minimum_deployment_target < ct.target.iOS18: pytest.skip("Sub-byte quantization is only supported since iOS18.") - model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data( + model, inputs, _, _ = get_test_model_and_data( quantize_config=cto.coreml.OpLinearQuantizerConfig( mode="linear_symmetric", dtype=types.get_nbits_int_builtin_type(n_bits, signed), granularity="per_tensor", - ) + ), + use_linear=use_linear, ) - scale = np.array([2.0], dtype=np.float32).reshape(1, 1, 1, 1) + target_scale_shape = (1, 1) if use_linear else (1, 1, 1, 1) + scale = np.array([2.0], dtype=np.float32).reshape(*target_scale_shape) zero_point = np.array( [0 if signed else 2 ** (n_bits - 1)], dtype=np.int8 if signed else np.uint8 - ).reshape(1, 1, 1, 1) + ).reshape(*target_scale_shape) model.register_buffer("_COREML_/metadata_version", torch.tensor(2)) model.register_buffer("_COREML_/weight/compression_type", torch.tensor([3])) @@ -482,13 +678,13 @@ def test_quantization(self, compute_unit, n_bits, signed, minimum_deployment_tar model.register_buffer("_COREML_/weight/quantization_scale", torch.from_numpy(scale)) model.register_buffer("_COREML_/weight/zero_point", torch.from_numpy(zero_point)) - traced_model = torch.jit.trace(model, torch_input_values) input_shape = [input.shape.to_list() for input in inputs] res = self.run_compare_torch( input_shape, - traced_model, + model, minimum_deployment_target=minimum_deployment_target, compute_unit=compute_unit, + frontend=frontend, converter=ct.convert, rtol=1e-04, atol=1e-03, @@ -511,11 +707,11 @@ def test_quantization(self, compute_unit, n_bits, signed, minimum_deployment_tar assert types.builtin_to_string(quantize_op.zero_point.dtype) == target_dtype_str @pytest.mark.parametrize( - "compute_unit, n_bits, minimum_deployment_target", - itertools.product(compute_units, [4, 8], [ct.target.iOS16, ct.target.iOS18]), + "compute_unit, n_bits, minimum_deployment_target, frontend", + itertools.product(compute_units, [4, 8], [ct.target.iOS16, ct.target.iOS18], frontends), ) def test_multiple_parameters_in_same_layer( - self, compute_unit, n_bits, minimum_deployment_target + self, compute_unit, n_bits, minimum_deployment_target, frontend ): """Test one layer has multiple parameters (such as weight and bias in a linear layer)""" if n_bits == 4 and minimum_deployment_target < ct.target.iOS18: @@ -559,13 +755,12 @@ def forward(self, x): ) model.register_buffer("_COREML_/metadata_version", torch.tensor(2)) - torch_input_values = torch.rand((8, 16)) - traced_model = torch.jit.trace(model, torch_input_values) res = self.run_compare_torch( [(8, 16)], - traced_model, + model, minimum_deployment_target=minimum_deployment_target, compute_unit=compute_unit, + frontend=frontend, converter=ct.convert, ) main_func = res[1]._mil_program.functions["main"] @@ -579,8 +774,15 @@ def forward(self, x): linear_ops = main_func.find_ops(op_type="linear") assert linear_ops[0].weight.op.op_type == "const" assert linear_ops[0].bias.op.op_type == "const" - assert linear_ops[1].weight.op.op_type == quantize_op_type - assert linear_ops[1].bias.op.op_type == quantize_op_type + if frontend == TorchFrontend.EXECUTORCH: + # In EXECUTORCH, the second linear layer is represented by `matmul` and `add` op. + matmul_op = main_func.find_ops(op_type="matmul")[0] + add_op = main_func.find_ops(op_type="add")[0] + assert matmul_op.y.op.op_type == quantize_op_type + assert add_op.x.op.op_type == quantize_op_type + else: + assert linear_ops[1].weight.op.op_type == quantize_op_type + assert linear_ops[1].bias.op.op_type == quantize_op_type quantize_ops = main_func.find_ops(op_type=quantize_op_type) assert len(quantize_ops) == 2 @@ -634,18 +836,28 @@ def test_invalid_compression_info(self): ) @pytest.mark.parametrize( - "compute_unit, n_bits, group_size, channel_axis, cluster_dim, minimum_deployment_target", + "compute_unit, n_bits, group_size, channel_axis, cluster_dim, use_linear, minimum_deployment_target, frontend", itertools.product( compute_units, [4, 8], [0, 1, 2], [0, 1], [1, 2], + [True, False], [ct.target.iOS16, ct.target.iOS18], + frontends, ), ) def test_palettization( - self, compute_unit, n_bits, group_size, channel_axis, cluster_dim, minimum_deployment_target + self, + compute_unit, + n_bits, + group_size, + channel_axis, + cluster_dim, + use_linear, + minimum_deployment_target, + frontend, ): if ( group_size in (0, 2) @@ -661,21 +873,35 @@ def test_palettization( pytest.skip("Cluster dim must <= group size.") model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data( - multi_layer=True + multi_layer=True, + use_linear=use_linear, ) - # per-channel scales for the [32, 64, 2, 2] and [64, 32, 2, 2] weight. - scale_1 = np.array([2.0] * 32, dtype=np.float32).reshape(32, 1, 1, 1) - scale_2 = np.array([3.0] * 64, dtype=np.float32).reshape(64, 1, 1, 1) + if use_linear: + # per-channel scales for the [32, 64] and [16, 32] weight. + scale_1 = np.array([2.0] * 32, dtype=np.float32).reshape(32, 1) + scale_2 = np.array([3.0] * 16, dtype=np.float32).reshape(16, 1) + else: + # per-channel scales for the [32, 64, 2, 2] and [64, 32, 2, 2] weight. + scale_1 = np.array([2.0] * 32, dtype=np.float32).reshape(32, 1, 1, 1) + scale_2 = np.array([3.0] * 64, dtype=np.float32).reshape(64, 1, 1, 1) + layername_1 = "linear_1" if use_linear else "conv_1" + layername_2 = "linear_2" if use_linear else "conv_2" unique_weight_1 = create_unique_weight( - model.conv_1.weight, nbits=n_bits, vector_size=cluster_dim, vector_axis=channel_axis + getattr(model, layername_1).weight, + nbits=n_bits, + vector_size=cluster_dim, + vector_axis=channel_axis, ) unique_weight_2 = create_unique_weight( - model.conv_2.weight, nbits=n_bits, vector_size=cluster_dim, vector_axis=channel_axis + getattr(model, layername_2).weight, + nbits=n_bits, + vector_size=cluster_dim, + vector_axis=channel_axis, ) - # Use grouped-channel-wise lut for conv1 for iOS18+. + # Use grouped-channel-wise lut for layer1 for iOS18+. block_sizes = [0] * len(unique_weight_1.shape) if minimum_deployment_target >= ct.target.iOS18: block_sizes[channel_axis] = group_size @@ -688,7 +914,7 @@ def test_palettization( channel_axis=channel_axis, ) - # Use per-tensor lut for conv2. + # Use per-tensor lut for layer2. lut_2_params = _quantization_passes.palettize_weights.blockwise_compress( unique_weight_2, "UNIQUE", @@ -704,30 +930,38 @@ def test_palettization( unique_weight_2 *= scale_2 with torch.no_grad(): - model.conv_1.weight = torch.nn.Parameter(torch.Tensor(unique_weight_1)) - model.conv_2.weight = torch.nn.Parameter(torch.Tensor(unique_weight_2)) + getattr(model, layername_1).weight = torch.nn.Parameter(torch.Tensor(unique_weight_1)) + getattr(model, layername_2).weight = torch.nn.Parameter(torch.Tensor(unique_weight_2)) model.register_buffer("_COREML_/metadata_version", torch.tensor(1)) if minimum_deployment_target >= ct.target.iOS18: - model.conv_1.register_buffer("_COREML_/weight/compression_type", torch.tensor([2])) - model.conv_1.register_buffer("_COREML_/weight/lut", torch.tensor(lut_1_params.lut)) - model.conv_1.register_buffer( + getattr(model, layername_1).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([2]) + ) + getattr(model, layername_1).register_buffer( + "_COREML_/weight/lut", torch.tensor(lut_1_params.lut) + ) + getattr(model, layername_1).register_buffer( "_COREML_/weight/palettization_scale", torch.from_numpy(scale_1) ) - model.conv_2.register_buffer("_COREML_/weight/compression_type", torch.tensor([2])) - model.conv_2.register_buffer("_COREML_/weight/lut", torch.tensor(lut_2_params.lut)) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([2]) + ) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/lut", torch.tensor(lut_2_params.lut) + ) if minimum_deployment_target >= ct.target.iOS18: - model.conv_2.register_buffer( + getattr(model, layername_2).register_buffer( "_COREML_/weight/palettization_scale", torch.from_numpy(scale_2) ) - traced_model = torch.jit.trace(model, torch_input_values) input_shape = [input.shape.to_list() for input in inputs] res = self.run_compare_torch( input_shape, - traced_model, + model, minimum_deployment_target=minimum_deployment_target, compute_unit=compute_unit, + frontend=frontend, converter=ct.convert, rtol=0.2 if cluster_dim > 1 else 1e-5, # Vector palettization has larger info loss. ) @@ -737,16 +971,19 @@ def test_palettization( expected_dtype = f"uint{n_bits}" expected_quantize_ops_num = 2 expected_palettize_ops_num = 2 - palettize_op_child_op_type = "constexpr_blockwise_shift_scale" + # The lut with pcs op order is determined by canonicalize_quantized_lut_pattern graph pass. + palettize_op_child_op_type = "linear" if use_linear else "conv" else: expected_dtype = "uint8" expected_quantize_ops_num = 0 expected_palettize_ops_num = 1 - # The iOS16 doesn't have per-channel-scale, so lut output is directly fed into conv. - palettize_op_child_op_type = "conv" + # The iOS16 doesn't have per-channel-scale, so lut output is directly fed into next op. + palettize_op_child_op_type = "linear" if use_linear else "conv" quantize_ops = main_func.find_ops(op_type="constexpr_blockwise_shift_scale") assert len(quantize_ops) == expected_quantize_ops_num + for quantize_op in quantize_ops: + assert quantize_op.outputs[0].child_ops[0].op_type == "constexpr_lut_to_dense" palettize_ops = main_func.find_ops(op_type="constexpr_lut_to_dense") assert len(palettize_ops) == expected_palettize_ops_num for palettize_op in palettize_ops: @@ -756,10 +993,10 @@ def test_palettization( assert palettize_op.lut.shape[-1] == cluster_dim @pytest.mark.parametrize( - "compute_unit, minimum_deployment_target", - itertools.product(compute_units, [ct.target.iOS16, ct.target.iOS18]), + "compute_unit, minimum_deployment_target, frontend", + itertools.product(compute_units, [ct.target.iOS16, ct.target.iOS18], frontends), ) - def test_palettization_8bit_lut(self, compute_unit, minimum_deployment_target): + def test_palettization_8bit_lut(self, compute_unit, minimum_deployment_target, frontend): model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data( multi_layer=True ) @@ -857,36 +1094,54 @@ def test_palettization_8bit_lut(self, compute_unit, minimum_deployment_target): assert len(palettize_ops) == 2 assert types.builtin_to_string(palettize_ops[0].indices.dtype) == "uint4" assert types.builtin_to_string(palettize_ops[1].indices.dtype) == "uint6" + # The op order is adjusted by common::canonicalize_quantized_lut_pattern graph pass. + for quantize_op in quantize_ops: + assert quantize_op.outputs[0].child_ops[0].op_type == "constexpr_lut_to_dense" for palettize_op in palettize_ops: - assert palettize_op.outputs[0].child_ops[0].op_type == "constexpr_blockwise_shift_scale" + assert palettize_op.outputs[0].child_ops[0].op_type == "conv" @pytest.mark.parametrize( - "compute_unit, sparse_ratio, minimum_deployment_target", + "compute_unit, sparse_ratio, use_linear, minimum_deployment_target, frontend", itertools.product( compute_units, [0.01, 0.5, 0.99], + [True, False], [ct.target.iOS16, ct.target.iOS18], + frontends, ), ) - def test_pruning(self, compute_unit, sparse_ratio, minimum_deployment_target): + def test_pruning( + self, compute_unit, sparse_ratio, use_linear, minimum_deployment_target, frontend + ): model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data( - multi_layer=True + multi_layer=True, use_linear=use_linear ) + layername_1 = "linear_1" if use_linear else "conv_1" + layername_2 = "linear_2" if use_linear else "conv_2" + with torch.no_grad(): - model.conv_1.weight = torch.nn.Parameter( + getattr(model, layername_1).weight = torch.nn.Parameter( torch.Tensor( - create_sparse_weight(model.conv_1.weight, target_sparsity=sparse_ratio) + create_sparse_weight( + getattr(model, layername_1).weight, target_sparsity=sparse_ratio + ) ) ) - model.conv_2.weight = torch.nn.Parameter( + getattr(model, layername_2).weight = torch.nn.Parameter( torch.Tensor( - create_sparse_weight(model.conv_2.weight, target_sparsity=sparse_ratio) + create_sparse_weight( + getattr(model, layername_2).weight, target_sparsity=sparse_ratio + ) ) ) model.register_buffer("_COREML_/metadata_version", torch.tensor(1)) - model.conv_1.register_buffer("_COREML_/weight/compression_type", torch.tensor([1])) - model.conv_2.register_buffer("_COREML_/weight/compression_type", torch.tensor([1])) + getattr(model, layername_1).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([1]) + ) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([1]) + ) traced_model = torch.jit.trace(model, torch_input_values) input_shape = [input.shape.to_list() for input in inputs] @@ -902,7 +1157,7 @@ def test_pruning(self, compute_unit, sparse_ratio, minimum_deployment_target): assert len(sparse_ops) == 2 for sparse_op in sparse_ops: - assert sparse_op.outputs[0].child_ops[0].op_type == "conv" + assert sparse_op.outputs[0].child_ops[0].op_type == "linear" if use_linear else "conv" assert types.builtin_to_string(sparse_op.nonzero_data.dtype) == "fp32" if minimum_deployment_target >= ct.target.iOS18: assert types.builtin_to_string(sparse_op.mask.dtype) == "uint1" @@ -911,52 +1166,69 @@ def test_pruning(self, compute_unit, sparse_ratio, minimum_deployment_target): assert types.builtin_to_string(sparse_op.shape.dtype) == "uint32" @pytest.mark.parametrize( - "compute_unit, n_bits, signed", + "compute_unit, n_bits, signed, use_linear, frontend", itertools.product( compute_units, [4, 8], [True, False], + [True, False], + frontends, ), ) - def test_joint_pruning_quantization(self, compute_unit, n_bits, signed): + def test_joint_pruning_quantization(self, compute_unit, n_bits, signed, use_linear, frontend): model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data( multi_layer=True, + use_linear=use_linear, ) # Make the weight sparse and also quantization-friendly. + layername_1 = "linear_1" if use_linear else "conv_1" + layername_2 = "linear_2" if use_linear else "conv_2" weight_1, scale_1, zero_point_1 = create_quantize_friendly_weight( - model.conv_1.weight.detach().numpy(), nbits=n_bits, signed=signed + getattr(model, layername_1).weight.detach().numpy(), nbits=n_bits, signed=signed ) - weight_1 *= np.random.randint(low=0, high=2, size=model.conv_1.weight.shape) + weight_1 *= np.random.randint(low=0, high=2, size=weight_1.shape) weight_2, scale_2, zero_point_2 = create_quantize_friendly_weight( - model.conv_2.weight.detach().numpy(), nbits=n_bits, signed=signed + getattr(model, layername_2).weight.detach().numpy(), nbits=n_bits, signed=signed ) - weight_2 *= np.random.randint(low=0, high=2, size=model.conv_2.weight.shape) + weight_2 *= np.random.randint(low=0, high=2, size=weight_2.shape) with torch.no_grad(): - model.conv_1.weight = torch.nn.Parameter(torch.Tensor(weight_1)) - model.conv_2.weight = torch.nn.Parameter(torch.Tensor(weight_2)) + getattr(model, layername_1).weight = torch.nn.Parameter(torch.Tensor(weight_1)) + getattr(model, layername_2).weight = torch.nn.Parameter(torch.Tensor(weight_2)) model.register_buffer("_COREML_/metadata_version", torch.tensor(2)) - model.conv_1.register_buffer("_COREML_/weight/compression_type", torch.tensor([1, 3])) - model.conv_1.register_buffer("_COREML_/weight/quantization_n_bits", torch.tensor(n_bits)) - model.conv_1.register_buffer( + getattr(model, layername_1).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([1, 3]) + ) + getattr(model, layername_1).register_buffer( + "_COREML_/weight/quantization_n_bits", torch.tensor(n_bits) + ) + getattr(model, layername_1).register_buffer( "_COREML_/weight/quantization_scale", torch.from_numpy(scale_1) ) - model.conv_1.register_buffer("_COREML_/weight/zero_point", torch.from_numpy(zero_point_1)) - model.conv_2.register_buffer("_COREML_/weight/compression_type", torch.tensor([1, 3])) - model.conv_2.register_buffer("_COREML_/weight/quantization_n_bits", torch.tensor(n_bits)) - model.conv_2.register_buffer( + getattr(model, layername_1).register_buffer( + "_COREML_/weight/zero_point", torch.from_numpy(zero_point_1) + ) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([1, 3]) + ) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/quantization_n_bits", torch.tensor(n_bits) + ) + getattr(model, layername_2).register_buffer( "_COREML_/weight/quantization_scale", torch.from_numpy(scale_2) ) - model.conv_2.register_buffer("_COREML_/weight/zero_point", torch.from_numpy(zero_point_2)) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/zero_point", torch.from_numpy(zero_point_2) + ) - traced_model = torch.jit.trace(model, torch_input_values) input_shape = [input.shape.to_list() for input in inputs] res = self.run_compare_torch( input_shape, - traced_model, + model, minimum_deployment_target=ct.target.iOS18, compute_unit=compute_unit, + frontend=frontend, converter=ct.convert, atol=1e-2, ) @@ -976,32 +1248,39 @@ def test_joint_pruning_quantization(self, compute_unit, n_bits, signed): for sparse_op in sparse_ops: assert types.builtin_to_string(sparse_op.mask.dtype) == "uint1" assert types.builtin_to_string(sparse_op.nonzero_data.dtype) == "fp32" - assert sparse_op.outputs[0].child_ops[0].op_type == "conv" + assert sparse_op.outputs[0].child_ops[0].op_type == "linear" if use_linear else "conv" @pytest.mark.parametrize( - "compute_unit, n_bits, group_size", + "compute_unit, n_bits, group_size, use_linear, frontend", itertools.product( compute_units, [4, 8], [0, 1, 2], + [True, False], + frontends, ), ) - def test_joint_pruning_palettization(self, compute_unit, n_bits, group_size): + def test_joint_pruning_palettization( + self, compute_unit, n_bits, group_size, use_linear, frontend + ): model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data( - multi_layer=True + multi_layer=True, + use_linear=use_linear, ) # Make the weight sparse and also can be represented by lut. - weight_1 = create_unique_weight(model.conv_1.weight, nbits=n_bits) * np.random.randint( - low=0, high=2, size=model.conv_1.weight.shape - ) - weight_2 = create_unique_weight(model.conv_2.weight, nbits=n_bits) * np.random.randint( - low=0, high=2, size=model.conv_2.weight.shape - ) + layername_1 = "linear_1" if use_linear else "conv_1" + layername_2 = "linear_2" if use_linear else "conv_2" + weight_1 = create_unique_weight( + getattr(model, layername_1).weight, nbits=n_bits + ) * np.random.randint(low=0, high=2, size=getattr(model, layername_1).weight.shape) + weight_2 = create_unique_weight( + getattr(model, layername_2).weight, nbits=n_bits + ) * np.random.randint(low=0, high=2, size=getattr(model, layername_2).weight.shape) with torch.no_grad(): - model.conv_1.weight = torch.nn.Parameter(torch.Tensor(weight_1)) - model.conv_2.weight = torch.nn.Parameter(torch.Tensor(weight_2)) + getattr(model, layername_1).weight = torch.nn.Parameter(torch.Tensor(weight_1)) + getattr(model, layername_2).weight = torch.nn.Parameter(torch.Tensor(weight_2)) lut_1_params = _quantization_passes.palettize_weights.blockwise_compress( weight_1, @@ -1017,10 +1296,18 @@ def test_joint_pruning_palettization(self, compute_unit, n_bits, group_size): ) model.register_buffer("_COREML_/metadata_version", torch.tensor(1)) - model.conv_1.register_buffer("_COREML_/weight/compression_type", torch.tensor([1, 2])) - model.conv_1.register_buffer("_COREML_/weight/lut", torch.tensor(lut_1_params.lut)) - model.conv_2.register_buffer("_COREML_/weight/compression_type", torch.tensor([1, 2])) - model.conv_2.register_buffer("_COREML_/weight/lut", torch.tensor(lut_2_params.lut)) + getattr(model, layername_1).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([1, 2]) + ) + getattr(model, layername_1).register_buffer( + "_COREML_/weight/lut", torch.tensor(lut_1_params.lut) + ) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/compression_type", torch.tensor([1, 2]) + ) + getattr(model, layername_2).register_buffer( + "_COREML_/weight/lut", torch.tensor(lut_2_params.lut) + ) traced_model = torch.jit.trace(model, torch_input_values) input_shape = [input.shape.to_list() for input in inputs] @@ -1058,4 +1345,4 @@ def test_joint_pruning_palettization(self, compute_unit, n_bits, group_size): for sparse_op in sparse_ops: assert types.builtin_to_string(sparse_op.mask.dtype) == "uint1" assert types.builtin_to_string(sparse_op.nonzero_data.dtype) == "fp32" - assert sparse_op.outputs[0].child_ops[0].op_type == "conv" + assert sparse_op.outputs[0].child_ops[0].op_type == "linear" if use_linear else "conv" diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_stateful_model.py b/coremltools/converters/mil/frontend/torch/test/test_torch_stateful_model.py index a9999fbe1..01828cdef 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_stateful_model.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_stateful_model.py @@ -9,7 +9,7 @@ import pytest import coremltools as ct -from coremltools._deps import _HAS_EXECUTORCH, _HAS_TORCH_EXPORT_API +from coremltools.converters.mil.frontend.torch.utils import TorchFrontend from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.types.symbolic import any_symbolic from coremltools.converters.mil.testing_reqs import compute_units @@ -25,15 +25,7 @@ torch = pytest.importorskip("torch") -from .testing_utils import TorchFrontend, export_torch_model_to_frontend - -frontends = [TorchFrontend.TORCHSCRIPT] -if _HAS_TORCH_EXPORT_API or _HAS_EXECUTORCH: - frontends.append(TorchFrontend.EXIR) - -ALTER_FRONTEND = [False] -if _HAS_EXECUTORCH: - ALTER_FRONTEND.append(True) +from .testing_utils import export_torch_model_to_frontend, frontends @pytest.fixture @@ -239,16 +231,13 @@ def forward(self, x): ) class TestStateConversionAPI: @pytest.mark.parametrize( - "compute_unit, frontend, alter_frontend", - itertools.product(compute_units, frontends, ALTER_FRONTEND), + "compute_unit, frontend", + itertools.product(compute_units, frontends), ) - def test_state_model_api_example(self, compute_unit, frontend, alter_frontend): + def test_state_model_api_example(self, compute_unit, frontend): """ Test the public API example. """ - if frontend == TorchFrontend.TORCHSCRIPT and alter_frontend: - pytest.skip("Stateful conversion from torch.jit.script is not supported") - class UpdateBufferModel(torch.nn.Module): def __init__(self): super(UpdateBufferModel, self).__init__() @@ -265,18 +254,18 @@ def forward(self, x): source_model, (torch.tensor([1, 2, 3], dtype=torch.float16),), frontend, - use_scripting=alter_frontend, - use_edge_dialect=alter_frontend, ) + inputs = [ct.TensorType(shape=(3,))] if frontend == TorchFrontend.TORCHSCRIPT else None + states = ( + [ct.StateType(wrapped_type=ct.TensorType(shape=(3,)), name="state_1")] + if frontend == TorchFrontend.TORCHSCRIPT + else None + ) mlmodel = ct.convert( torch_model, - inputs=(None if frontend == TorchFrontend.EXIR else [ct.TensorType(shape=(3,))]), - states=( - None - if frontend == TorchFrontend.EXIR - else [ct.StateType(wrapped_type=ct.TensorType(shape=(3,)), name="state_1")] - ), + inputs=inputs, + states=states, minimum_deployment_target=ct.target.iOS18, convert_to="mlprogram", compute_units=compute_unit, diff --git a/coremltools/converters/mil/frontend/torch/test/testing_utils.py b/coremltools/converters/mil/frontend/torch/test/testing_utils.py index c52d84881..624bca002 100644 --- a/coremltools/converters/mil/frontend/torch/test/testing_utils.py +++ b/coremltools/converters/mil/frontend/torch/test/testing_utils.py @@ -3,6 +3,9 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +import os +import platform +from pathlib import Path from typing import List, Union import numpy as np @@ -12,12 +15,18 @@ import coremltools as ct import coremltools.models.utils as coremltoolsutils -from coremltools import RangeDim, TensorType, _logger as logger +from coremltools import RangeDim, TensorType +from coremltools import _logger as logger from coremltools._deps import _HAS_EXECUTORCH, _HAS_TORCH_EXPORT_API, _IS_MACOS from coremltools.converters.mil.mil.types.type_mapping import nptype_from_builtin -from coremltools.converters.mil.testing_utils import ct_convert, validate_minimum_deployment_target +from coremltools.converters.mil.testing_utils import ( + _create_current_pytest_serialization_path, + ct_convert, + debug_save_mlmodels, + validate_minimum_deployment_target, +) -from ..utils import TORCH_DTYPE_TO_MIL_DTYPE, TorchFrontend +from ..utils import TORCH_DTYPE_TO_MIL_DTYPE, TORCH_EXPORT_BASED_FRONTENDS, TorchFrontend if _HAS_TORCH_EXPORT_API: from torch.export import ExportedProgram @@ -25,6 +34,34 @@ if _HAS_EXECUTORCH: import executorch.exir +if "TORCH_FRONTENDS" in os.environ: + frontends = [] + for frontend_str in os.environ["TORCH_FRONTENDS"].split(","): + frontend = TorchFrontend[frontend_str] + if platform.machine() == "x86_64" and frontend in TORCH_EXPORT_BASED_FRONTENDS: + logger.warning( + f"{frontend_str} is not supported well on x86_64, skipped this frontend test" + ) + continue + if frontend == TorchFrontend.TORCHEXPORT and not _HAS_TORCH_EXPORT_API: + logger.warning( + "Must have torch.export API to test TORCHEXPORT frontend. Skipped this frontend test." + ) + continue + if frontend == TorchFrontend.EXECUTORCH and not _HAS_EXECUTORCH: + logger.warning( + "Must have executorch to test EXECUTORCH frontend. Skipped this frontend test." + ) + continue + frontends.append(frontend) +else: + frontends = [TorchFrontend.TORCHSCRIPT] + if platform.machine() != "x86_64": + if _HAS_TORCH_EXPORT_API: + frontends.append(TorchFrontend.TORCHEXPORT) + if _HAS_EXECUTORCH: + frontends.append(TorchFrontend.EXECUTORCH) + class ModuleWrapper(nn.Module): """ @@ -157,7 +194,6 @@ def export_torch_model_to_frontend( input_data, frontend, use_scripting=False, - use_edge_dialect=True, torch_export_dynamic_shapes=None, ): input_data_clone = _copy_input_data(input_data) @@ -173,7 +209,7 @@ def export_torch_model_to_frontend( else: model_spec = torch.jit.trace(model, input_data_clone) - elif frontend == TorchFrontend.EXIR: + elif frontend in TORCH_EXPORT_BASED_FRONTENDS: try: model.eval() except NotImplementedError: @@ -182,13 +218,13 @@ def export_torch_model_to_frontend( model_spec = torch.export.export( model, input_data_clone, dynamic_shapes=torch_export_dynamic_shapes ) - if use_edge_dialect: + if frontend == TorchFrontend.EXECUTORCH: model_spec = executorch.exir.to_edge(model_spec).exported_program() else: raise ValueError( "Unknown value of frontend. Needs to be either TorchFrontend.TORCHSCRIPT " - f"or TorchFrontend.EXIR. Provided: {frontend}" + f"or TorchFrontend.TORCHEXPORT or TorchFrontend.EXECUTORCH. Provided: {frontend}" ) return model_spec @@ -240,6 +276,15 @@ def convert_and_compare( torch_input = _copy_input_data(input_data) expected_results = torch_model(*torch_input) expected_results = flatten_and_detach_torch_results(expected_results) + + PYTEST_CURRENT_TEST = os.environ.get("PYTEST_CURRENT_TEST").split("(call)")[0].strip() + if PYTEST_CURRENT_TEST in debug_save_mlmodels: + serialization_path = _create_current_pytest_serialization_path() + Path(serialization_path).mkdir(parents=True, exist_ok=True) + flat_inputs = flatten_and_detach_torch_results(input_data) + np.savez(serialization_path + "ref_inputs.npz", *flat_inputs) + np.savez(serialization_path + "ref_outputs.npz", *expected_results) + mlmodel = convert_to_mlmodel( model_spec, input_data, @@ -294,9 +339,6 @@ def run_compare_torch( backend=("neuralnetwork", "fp32"), rand_range=(-1.0, 1.0), use_scripting=False, - # TODO (rdar://128768037): Once we fully figure out torch.export converter, - # we may default the tests to ATen dialect - use_edge_dialect=True, converter_input_type=None, compute_unit=ct.ComputeUnit.CPU_ONLY, minimum_deployment_target=None, @@ -312,7 +354,7 @@ def run_compare_torch( expected_results : Expected result from running pytorch model. converter_input_type: If not None, then pass it to the "inputs" argument to the ct.convert() call. - frontend: Either TorchFrontend.TORCHSCRIPT or TorchFrontend.EXIR + frontend: TorchFrontend enum """ if minimum_deployment_target is not None: validate_minimum_deployment_target(minimum_deployment_target, backend) @@ -325,7 +367,6 @@ def run_compare_torch( input_data, frontend, use_scripting=use_scripting, - use_edge_dialect=use_edge_dialect, torch_export_dynamic_shapes=torch_export_dynamic_shapes, ) diff --git a/coremltools/converters/mil/frontend/torch/utils.py b/coremltools/converters/mil/frontend/torch/utils.py index e3b4da127..3507bf24e 100644 --- a/coremltools/converters/mil/frontend/torch/utils.py +++ b/coremltools/converters/mil/frontend/torch/utils.py @@ -118,7 +118,11 @@ def dtype_to_32bit(dtype): class TorchFrontend(Enum): TORCHSCRIPT = 1 - EXIR = 2 + TORCHEXPORT = 2 + EXECUTORCH = 3 + + +TORCH_EXPORT_BASED_FRONTENDS = (TorchFrontend.TORCHEXPORT, TorchFrontend.EXECUTORCH) def sanitize_op_kind(op_kind: str) -> str: @@ -141,14 +145,21 @@ def skip_default_prefix_and_suffix_with_deliminator( ) -> str: split = op_kind.split(deliminator) start = 1 if split[0] in {"aten", "prim"} and len(split) > 1 else 0 - stop = -1 if split[-1] in { - "default", - "tensor", - "tensor_mode", - "scalar", - "tensor_scalar", - } and len(split) - start > 1 else len(split) - op_kind = deliminator.join(split[start : stop]) + stop = ( + -1 + if split[-1] + in { + "default", + "int", + "tensor", + "tensor_mode", + "scalar", + "tensor_scalar", + } + and len(split) - start > 1 + else len(split) + ) + op_kind = deliminator.join(split[start:stop]) return op_kind # 1. Lower case diff --git a/coremltools/converters/mil/mil/operation.py b/coremltools/converters/mil/mil/operation.py index f7fc9cee9..624b10737 100644 --- a/coremltools/converters/mil/mil/operation.py +++ b/coremltools/converters/mil/mil/operation.py @@ -498,8 +498,8 @@ def check_and_detach(v_new, v_old, op, no_check_var_types): and not no_check_var_types ): raise ValueError( - f"New var type `{v_new.sym_type}` not a " - f"subtype of existing var type `{v_old.sym_type}`." + f"New var {v_new} doesn't have compatible " + f"subtype of existing var `{v_old}`." ) v_old.remove_child_op(op, no_check_var_types) diff --git a/coremltools/converters/mil/mil/ops/defs/iOS18/compression.py b/coremltools/converters/mil/mil/ops/defs/iOS18/compression.py index c89b2be90..2f5f59f62 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS18/compression.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS18/compression.py @@ -97,8 +97,8 @@ def _validate_shift_scale_inputs( scale_dim = scale.shape[rank_idx] if data_dim % scale_dim != 0: raise ValueError( - f"Number of scales along each dimension should be a factor of " - f"corresponding dimension size of 'data'. However, at dim " + "Number of scales along each dimension should be a factor of " + "corresponding dimension size of 'data'. However, at dim " f"{rank_idx}, the 'data' has {data_dim} while 'scale' has {scale_dim}." ) diff --git a/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py b/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py index 58e21c3c9..11c1b5167 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py @@ -63,6 +63,37 @@ def build(x): prog = mlmodel._mil_program assert "constexpr_affine_dequantize" in get_op_types_in_program(prog) + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_builder_to_backend_linear(self, compute_unit, backend): + input_data = np.ones((4, 64), dtype=np.float32) + input_placeholders = { + "x": mb.placeholder(shape=input_data.shape), + } + input_values = {"x": input_data} + + def build(x): + weight = mb.constexpr_affine_dequantize( + quantized_data=np.ones((32, 64), dtype=np.uint8), + zero_point=np.uint8(0), + scale=np.float32(2.0), + axis=0, + ) + return mb.linear(x=x, weight=weight, bias=np.zeros((32,), dtype=np.float32)) + + expected_output_types = (4, 32, types.fp32) + expected_outputs = np.ones((4, 32), dtype=np.float32) * 128 + + mlmodel = run_compare_builder( + build, + input_placeholders, + input_values, + expected_output_types, + expected_outputs, + compute_unit=compute_unit, + backend=backend, + ) + assert "constexpr_affine_dequantize" in get_op_types_in_program(mlmodel._mil_program) + def test_is_all_zeros(self): @mb.program(opset_version=ct.target.iOS16) def prog_0_scalar(): diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py b/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py index 5267e8e9e..545c768cb 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py @@ -4,7 +4,7 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause import hashlib -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Union import numpy as np @@ -46,14 +46,32 @@ class const_deduplication(AbstractGraphPass): (2) Deduplication of ``constexpr_*`` op: We consider a ``constexpr_*`` as duplicated if there exists such a previous ``constexpr_*`` that has the same ``op_type`` and input attributes. + + Support options: + + - ``const_threshold``: Skip deduplicating ``const`` ops that have smaller number of elements than a threshold. Defaults to ``100``. i.e. the constants with ``size < 100`` will not be deduplicated. """ - NUMEL_THRESH = 100 + # const with size < _const_threshold will not be deduplicated + _const_threshold = 100 + + # length of the number value hashkey + LENGTH_OF_HASHKEY = 100 DTYPE2ATOL = { types.fp16: 6e-8, types.fp32: 1e-12, } + @property + def const_threshold(self) -> int: + return const_deduplication._const_threshold + + @const_threshold.setter + def const_threshold(self, val: int) -> None: + if not isinstance(val, int): + raise ValueError(f"Expect option 'const_threshold' to be type of int. Got {type(val)}.") + const_deduplication._const_threshold = val + def apply(self, prog) -> None: for f in prog.functions.values(): self._constant_deduplication_block(f) @@ -140,10 +158,10 @@ def find_constexprs(blocks: List[Block]) -> Dict[Var, List[Var]]: hash_key = [op.op_type] for v in op.inputs.values(): hash_key.append(v.dtype) - if np.prod(v.shape) < const_deduplication.NUMEL_THRESH: - hash_key.append(str(v.val)) - else: + if v.val is None or const_deduplication.should_be_deduplicated(v.val): hash_key.append(v) + else: + hash_key.append(str(v.val)) hash_key = tuple(hash_key) if hash_key not in hashkey_2_duplicates: hashkey_2_duplicates[hash_key] = [op.outputs[0]] @@ -152,6 +170,15 @@ def find_constexprs(blocks: List[Block]) -> Dict[Var, List[Var]]: return {v[0]: v[1:] for v in hashkey_2_duplicates.values()} + @staticmethod + def should_be_deduplicated(val: Union[str, bool, np.ndarray]) -> bool: + assert val is not None, "val should only be type of (str, bool, np.ndarray)" + if isinstance(val, (str, bool)): + return False + if np.prod(val.shape) < const_deduplication._const_threshold: + return False + return True + @staticmethod def find_constants(blocks: List[Block]) -> Dict[Var, List[Var]]: """ @@ -173,16 +200,16 @@ def find_constants(blocks: List[Block]) -> Dict[Var, List[Var]]: constant_var = op.outputs[0] if isinstance(constant_var, ListVar): continue - shape = constant_var.shape - numel = np.prod(shape) - if numel < const_deduplication.NUMEL_THRESH: + if not const_deduplication.should_be_deduplicated(constant_var.val): continue + shape = constant_var.shape dtype = constant_var.dtype value = constant_var.val + hash = hashlib.sha1( - np.ascontiguousarray(value.reshape(-1)[: const_deduplication.NUMEL_THRESH]) + np.ascontiguousarray(value.reshape(-1)[: const_deduplication.LENGTH_OF_HASHKEY]) ).hexdigest() if hasattr(op, "weight_key"): key = (op.weight_key, dtype, shape, hash) diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py b/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py index 216ae8937..ddd147248 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py @@ -427,7 +427,10 @@ def _try_to_transform(op, block): # check the expand_dim op has axes = [0] expand_dims_op = expand_dims_ops[0] - if expand_dims_op.axes.val != [0]: + expand_dims_op_axes_val = expand_dims_op.axes.val + if isinstance(expand_dims_op_axes_val, np.ndarray): + expand_dims_op_axes_val = expand_dims_op_axes_val.tolist() + if expand_dims_op_axes_val != [0]: return False ops_to_remove.append(expand_dims_op) ops_to_remove += other_ops diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py b/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py index 220b5ea8f..a0f177c61 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py @@ -1120,3 +1120,115 @@ def _help_move_scale( new_var=scaled_output, force_replace=True, # Need to force replace because it involves replacing constexpr op. ) + + +@register_pass(namespace="common") +class canonicalize_quantized_lut_pattern(AbstractGraphPass): + """ + The quantized lut (e.g. each entry in the LUT is int8) could be represented by two patterns: + Pattern 1: + lut(int8) -> constexpr_blockwise_shift_scale -> lut(fp16) -> constexpr_lut_to_dense -> dense(fp16) + Pattern 2: + lut(int8) -> constexpr_lut_to_dense -> dense(int8) -> constexpr_blockwise_shift_scale -> dense(fp16) + Those two patterns are mathematically equivalent when the quantization is per-tensor or per-channel. + + This graph pass makes sure we always use one specific pattern by re-ordering the ops. + """ + + _DEQUANT_FIRST = True # First dequantize and then depalettize (use pattern 1). + + def apply(self, prog): + wrong_order_op1 = ( + "constexpr_lut_to_dense" if self._DEQUANT_FIRST else "constexpr_blockwise_shift_scale" + ) + wrong_order_op2 = ( + "constexpr_blockwise_shift_scale" if self._DEQUANT_FIRST else "constexpr_lut_to_dense" + ) + + @block_context_manager + def apply_block(block: Block): + for op in list(block.operations): + for b in op.blocks: + apply_block(b) + if op.op_type == wrong_order_op1 and len(op.outputs[0].child_ops) == 1: + if op.outputs[0].child_ops[0].op_type == wrong_order_op2: + self._reorder_quant_lut(block, op) + + for f in prog.functions.values(): + apply_block(f) + + def _reorder_quant_lut(self, block: Block, old_op1: Operation): + """ + Original order is op1 -> op2 -> output_op, and after reorder it becomes op2 -> op1 -> output_op. + Here op1 and op2 corresponds to either lut op or quant op, depending on `_DEQUANT_FIRST`. + """ + old_op2 = old_op1.outputs[0].child_ops[0] + # If the old op has some meaningful info in the name (such as "conv1.weight"), we need to keep it. + new_op1_name = None if old_op1.op_type in old_op1.name else old_op1.name + new_op2_name = None if old_op2.op_type in old_op2.name else old_op2.name + + if old_op1.op_type == "constexpr_blockwise_shift_scale": + # The old_op1 is dequant op and old_op2 is a lut op. + # The scale and offset from old_op1 is for lut, so the rank need to be adjusted. + if old_op1.scale.shape[-2:] != (1, 1): + raise AssertionError( + "The quantization on lut must be per-tensor, so last two dims in `scale` should " + f"both be 1, but got scale with shape {old_op1.scale.shape}." + ) + new_scale_shape = old_op1.scale.shape[-2:] + scale = old_op1.scale.val.reshape(new_scale_shape) + offset = old_op1.offset + if offset is not None and offset.val is not None: + offset = old_op1.offset.val.reshape(new_scale_shape) + + new_op1_args = {"indices": old_op2.indices, "lut": old_op1.data, "before_op": old_op2} + if new_op1_name is not None: + new_op1_args["name"] = new_op1_name + new_op1 = mb.constexpr_lut_to_dense(**new_op1_args) + + new_op2_args = {"data": new_op1, "scale": scale, "offset": offset, "before_op": old_op2} + if new_op2_name is not None: + new_op2_args["name"] = new_op2_name + new_op2 = mb.constexpr_blockwise_shift_scale(**new_op2_args) + else: + # The old_op1 is lut op and old_op2 is a dequant op. + # The scale and offset from old_op2 is for depalettized weight, so the rank need to be adjusted to match + # the lut's rank. + new_scale_shape = old_op2.scale.shape + (1, 1) + scale = old_op2.scale.val.reshape(new_scale_shape) + offset = old_op2.offset + if offset is not None and offset.val is not None: + offset = old_op2.offset.val.reshape(new_scale_shape) + + lut = old_op1.lut + if any(shape != 1 for shape in new_scale_shape): + # The lut need to be repeated when necessary. For example, in per-channel-scale, the lut has shape + # [16, 1, 16, 1], indices has shape [32, 1], and scale has shape [32, 1]. It means every two rows in + # the weight share a lut, and it's impossible to apply 32 scales to 16 lut tables. So we need to repeat + # the lut to become [32, 1, 16, 1], and then apply those 32 scales to each row. + lut = old_op1.lut.val + if lut is None: + return # Cannot handle the reording when the lut is not const. + for axis, (scale_shape, lut_shape) in enumerate(zip(new_scale_shape, lut.shape)): + if scale_shape > lut_shape: + if scale_shape % lut_shape != 0: + return # Skip when lut's shape cannot be repeated to match scale's shape. + lut = np.repeat(lut, scale_shape // lut_shape, axis=axis) + + new_op1_args = {"data": lut, "scale": scale, "offset": offset, "before_op": old_op1} + if new_op1_name is not None: + new_op1_args["name"] = new_op1_name + new_op1 = mb.constexpr_blockwise_shift_scale(**new_op1_args) + + new_op2_args = {"indices": old_op1.indices, "lut": new_op1, "before_op": old_op1} + if new_op2_name is not None: + new_op2_args["name"] = new_op2_name + new_op2 = mb.constexpr_lut_to_dense(**new_op2_args) + + block.replace_uses_of_var_after_op( + anchor_op=old_op2, + old_var=old_op2.outputs[0], + new_var=new_op2, + force_replace=True, # Need to force replace because it involves replacing constexpr op. + ) + block.remove_ops([old_op1, old_op2]) diff --git a/coremltools/converters/mil/mil/passes/defs/quantization.py b/coremltools/converters/mil/mil/passes/defs/quantization.py index 6fa20e947..a91edbde4 100644 --- a/coremltools/converters/mil/mil/passes/defs/quantization.py +++ b/coremltools/converters/mil/mil/passes/defs/quantization.py @@ -9,6 +9,7 @@ import numpy as np +from coremltools import _logger as logger from coremltools.converters.mil._deployment_compatibility import AvailableTarget from coremltools.converters.mil.input_types import TensorType from coremltools.converters.mil.mil import Block @@ -293,6 +294,11 @@ def transform_op(self, op) -> None: len(var._child_ops) > 1 and casted_var_name in self.current_cache_vars() ): + if self.current_cache_vars()[casted_var_name].op.x != var: + logger.warning( + "The cached cast Var doesn't match the original Var. It's due to duplicated Var " + f"names in the graph for {casted_var_name}." + ) casted_inputs[param][i] = self.current_cache_vars()[casted_var_name] else: x = mb.cast( diff --git a/coremltools/converters/mil/mil/passes/pass_pipeline.py b/coremltools/converters/mil/mil/passes/pass_pipeline.py index 2ffcedc48..2446ba07d 100644 --- a/coremltools/converters/mil/mil/passes/pass_pipeline.py +++ b/coremltools/converters/mil/mil/passes/pass_pipeline.py @@ -34,6 +34,7 @@ # after all quantization passes, since constexpr will not be further optimized # before const elimination, otherwise const dequantize would get bloated "common::dequantize_to_constexpr", + "common::canonicalize_quantized_lut_pattern", "common::const_elimination", "common::sanitize_input_output_names", "common::divide_to_multiply", @@ -93,6 +94,7 @@ # in the network (while reducing the total number of transposes), and after passes such as "fuse_layernorm_or_instancenorm" # which detects patterns that involve redundant ops ("sub") etc. "common::remove_redundant_ops", + "common::dedup_op_and_var_names", # Must be applied before "add_fp16_cast" because "add_fp16_cast" use unique name cache. "common::add_fp16_cast", # Will be removed if compute precision is not FP16. "common::add_int16_cast", # Will be removed if compute precision is not FP16. "common::update_output_dtypes", # Must run again after `add_fp16_cast` and `add_int16_cast`. diff --git a/coremltools/converters/mil/mil/passes/tests/test_cleanup_passes.py b/coremltools/converters/mil/mil/passes/tests/test_cleanup_passes.py index 87f54ceed..a1a7d7785 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_cleanup_passes.py +++ b/coremltools/converters/mil/mil/passes/tests/test_cleanup_passes.py @@ -150,6 +150,138 @@ def func(x): assert const_ops[1].weight_id == 1 assert const_ops[2].weight_id == 2 + @staticmethod + def test_const_deduplication_with_threshold(): + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(2,)), + ] + ) + def prog(x): + # const_1 and const_2 will not be deduplicated + const_1 = [0.0] + const_2 = [0.0] + const_3 = [0.0, 1.0] + const_4 = [0.0, 1.0] + + # 4 add ops + x = mb.add(x=x, y=const_1) + x = mb.add(x=x, y=const_2) + x = mb.add(x=x, y=const_3) + return mb.add(x=x, y=const_4) + + graph_pass = PASS_REGISTRY["common::const_deduplication"] + graph_pass.const_threshold = 2 + apply_pass_and_basic_check(prog, graph_pass) + + # check the graph pass + assert_op_count_match(prog, expect=3, op="const") + const_ops = prog.functions["main"].find_ops(op_type="const") + assert const_ops[0].outputs[0].val.tolist() == [0.0] + assert const_ops[1].outputs[0].val.tolist() == [0.0] + assert const_ops[2].outputs[0].val.tolist() == [0.0, 1.0] + + @staticmethod + def test_const_deduplication_with_threshold_for_pad(): + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(100,)), + ] + ) + def prog(x): + # both constant_val and pad inputs for two pad ops are deduplicaed + c_zero_scalar = np.float32(0.0) + x = mb.pad(x=x, pad=[1, 0], mode="constant", constant_val=c_zero_scalar) + return mb.pad(x=x, pad=[1, 0], mode="constant", constant_val=c_zero_scalar) + + graph_pass = PASS_REGISTRY["common::const_deduplication"] + graph_pass.const_threshold = -1 + apply_pass_and_basic_check(prog, graph_pass) + + # check the graph pass + assert_op_count_match(prog, expect=4, op="const") + const_ops = prog.functions["main"].find_ops(op_type="const") + assert const_ops[0].outputs[0].val.tolist() == [1, 0] + assert const_ops[1].outputs[0].val == "constant" + assert const_ops[2].outputs[0].val == 0.0 + assert const_ops[3].outputs[0].val == "constant" + + @staticmethod + @pytest.mark.parametrize( + "constexpr_op", + CONSTEXPR_OPS, + ) + def test_constexpr_deduplication_with_threshold(constexpr_op): + BATCH_DIM = 1 + SEQUENCE_LENGTH = 1 + ENCODING_DIM = 1 + EMBEDDING_DIM = 2 + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), + mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), + ] + ) + def prog(q, k): + weight_q = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM, ENCODING_DIM), seed=19) + weight_k = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM, ENCODING_DIM), seed=19) + q_e = mb.linear(x=q, weight=weight_q) + k_e = mb.linear(x=k, weight=weight_k) + return mb.matmul(x=q_e, y=k_e, transpose_y=True) + + graph_pass = PASS_REGISTRY["common::const_deduplication"] + graph_pass.const_threshold = -1 + apply_pass_and_basic_check(prog, graph_pass) + + # check the graph pass + assert_op_count_match(prog, expect=1, op=constexpr_op) + + @staticmethod + def test_str_should_not_be_deduplicated(): + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(1,)), + ] + ) + def prog(x): + x = mb.cast(x=x, dtype="int32") + return mb.cast(x=x, dtype="int32") + + graph_pass = PASS_REGISTRY["common::const_deduplication"] + graph_pass.const_threshold = -1 + apply_pass_and_basic_check(prog, graph_pass) + + # check the graph pass + assert_op_count_match(prog, expect=2, op="const") + const_ops = prog.functions["main"].find_ops(op_type="const") + assert const_ops[0].outputs[0].val == "int32" + assert const_ops[1].outputs[0].val == "int32" + + @staticmethod + def test_bool_should_not_be_deduplicated(): + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(2,)), + mb.TensorSpec(shape=(2,)), + ] + ) + def prog(x, y): + return mb.argsort(x=x, axis=-1, ascending=False), mb.argsort( + x=y, axis=-1, ascending=False + ) + + graph_pass = PASS_REGISTRY["common::const_deduplication"] + graph_pass.const_threshold = -1 + apply_pass_and_basic_check(prog, graph_pass) + + # check the graph pass + assert_op_count_match(prog, expect=3, op="const") + const_ops = prog.functions["main"].find_ops(op_type="const") + assert const_ops[0].outputs[0].val == -1 + assert const_ops[1].outputs[0].val == False + assert const_ops[2].outputs[0].val == False + @pytest.mark.parametrize( "q_weight_key, k_weight_key", itertools.product( diff --git a/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py b/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py index b03c8e46e..8ec2d271a 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py +++ b/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py @@ -2017,6 +2017,118 @@ def prog(x): assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) +@pytest.mark.skipif(ct.utils._macos_version() < (15, 0), reason="Only supported on macOS 15+") +class TestReorderQuantizedLut: + @staticmethod + def _verify_numerical(prev_prog, prog, block, input_shape, rtol=1e-7, atol=0.0): + # Verify the numerical output matches between `prev_prog` and `prog`. + prev_model = ct.convert( + prev_prog, + pass_pipeline=ct.PassPipeline.EMPTY, + convert_to="mlprogram", + minimum_deployment_target=ct.target.iOS18, + ) + model = ct.convert( + prog, + pass_pipeline=ct.PassPipeline.EMPTY, + convert_to="mlprogram", + minimum_deployment_target=ct.target.iOS18, + ) + output_name = block.outputs[0].name + x_val = np.random.rand(*input_shape).astype(np.float16) + input_dict = {"x": x_val} + prev_output = prev_model.predict(input_dict)[output_name] + output = model.predict(input_dict)[output_name] + np.testing.assert_allclose(prev_output, output, rtol=rtol, atol=atol) + + @staticmethod + def _construct_weights_with_two_orders(weight_shape: Tuple[int, ...]): + """Construct two quantized lut weights, represented in different quant/lut orders.""" + nbits = 4 + num_palette = 2**nbits + indices_np_dtype = types.nptype_from_builtin(types.string_to_builtin(f"uint{nbits}")) + indices = np.random.randint(low=0, high=num_palette, size=weight_shape).astype( + indices_np_dtype + ) + lut_shape = weight_shape + (num_palette, 1) + int8_lut = np.random.randint(low=0, high=6, size=lut_shape, dtype=np.int8) + scale = np.float16(2.0).reshape([1] * len(weight_shape)) + offset = np.int8(1).reshape([1] * len(weight_shape)) + + lut_weight1 = mb.constexpr_lut_to_dense(indices=indices, lut=int8_lut) + quantized_lut_weight1 = mb.constexpr_blockwise_shift_scale( + data=lut_weight1, scale=scale, offset=offset + ) + quantized_weight2 = mb.constexpr_blockwise_shift_scale( + data=int8_lut, + scale=scale.reshape([1] * len(int8_lut.shape)), + offset=offset.reshape([1] * len(int8_lut.shape)), + ) + quantized_lut_weight2 = mb.constexpr_lut_to_dense(indices=indices, lut=quantized_weight2) + + return quantized_lut_weight1, quantized_lut_weight2 + + @pytest.mark.parametrize( + "input_shape, dequant_first", itertools.product([(4, 3), (2, 3, 4)], [True, False]) + ) + def test_dequant_first(self, input_shape, dequant_first): + """ + When dequant_first is True, the quantized lut ops representation will be reordered to follow + lut(int8) -> constexpr_blockwise_shift_scale -> lut(fp16) -> constexpr_lut_to_dense -> dense(fp16). + When dequant_first is False, the quantized lut ops representation will be reordered to follow + lut(int8) -> constexpr_lut_to_dense -> dense(int8) -> constexpr_blockwise_shift_scale -> dense(fp16) + """ + + @mb.program( + input_specs=[mb.TensorSpec(shape=input_shape, dtype=types.fp16)], + opset_version=ct.target.iOS18, + ) + def prog(x): + quantized_lut_weight1, quantized_lut_weight2 = self._construct_weights_with_two_orders( + weight_shape=(8, input_shape[-1]) + ) + output1 = mb.linear(x=x, weight=quantized_lut_weight1) + output2 = mb.linear(x=x, weight=quantized_lut_weight2) + return mb.add(x=output1, y=output2) + + from unittest import mock + + from coremltools.converters.mil.mil.passes.defs.optimize_quantization import ( + canonicalize_quantized_lut_pattern, + ) + + with mock.patch.object(canonicalize_quantized_lut_pattern, "_DEQUANT_FIRST", dequant_first): + prev_prog, _, block = apply_pass_and_basic_check( + prog, "common::canonicalize_quantized_lut_pattern", skip_essential_scope_check=True + ) + + assert get_op_types_in_program(prev_prog) == [ + "constexpr_lut_to_dense", + "constexpr_blockwise_shift_scale", + "constexpr_blockwise_shift_scale", + "constexpr_lut_to_dense", + "linear", + "linear", + "add", + ] + dequant_ops = prog.functions["main"].find_ops(op_type="constexpr_blockwise_shift_scale") + lut_ops = prog.functions["main"].find_ops(op_type="constexpr_lut_to_dense") + assert len(dequant_ops) == 2 + assert len(lut_ops) == 2 + if dequant_first: + for dequant_op in dequant_ops: + assert dequant_op.outputs[0].child_ops[0].op_type == "constexpr_lut_to_dense" + for lut_op in lut_ops: + assert lut_op.outputs[0].child_ops[0].op_type == "linear" + else: + for lut_op in lut_ops: + assert lut_op.outputs[0].child_ops[0].op_type == "constexpr_blockwise_shift_scale" + for dequant_op in dequant_ops: + assert dequant_op.outputs[0].child_ops[0].op_type == "linear" + + self._verify_numerical(prev_prog, prog, block, input_shape) + + class TestFP16CastTransform: def assertEqual(self, first, second): """A convenience method to migrate from unittest (self.assertEqual) to pytest.""" diff --git a/coremltools/converters/mil/testing_utils.py b/coremltools/converters/mil/testing_utils.py index bc23279dc..6261f8fe9 100644 --- a/coremltools/converters/mil/testing_utils.py +++ b/coremltools/converters/mil/testing_utils.py @@ -115,20 +115,15 @@ def macos_compatible_with_deployment_target(minimum_deployment_target): return False return True -def _serialize_current_pytest(mlmodel): - """ - Usually pytest test name is of format file::class::test_function[param0-param1] (call)... - Assume each test produces only one Core ML model, - then file::class::test_function[param0-param1] is enough to determine unique name - {_COREMLTOOLS_DEBUG_SAVE_MLMODEL_DIRECTORY}/file/class/test_function/param0/param1/model.mlpackage - """ - mlpackage_path = _COREMLTOOLS_DEBUG_SAVE_MLMODEL_DIRECTORY + "/" + +def _create_current_pytest_serialization_path() -> str: + serialization_path = _COREMLTOOLS_DEBUG_SAVE_MLMODEL_DIRECTORY + "/" PYTEST_CURRENT_TEST = os.environ.get("PYTEST_CURRENT_TEST").split("(call)")[0].strip() test_name_fragments = PYTEST_CURRENT_TEST.split("::") for test_name_fragment in test_name_fragments[:-1]: - mlpackage_path += f"{test_name_fragment.strip()}/" + serialization_path += f"{test_name_fragment.strip()}/" test_name = test_name_fragments[-1] # For a parameterized test, further decompose parameters into directories @@ -138,17 +133,27 @@ def _serialize_current_pytest(mlmodel): test_function_name = test_name[:bra_index] parameters = test_name[bra_index + 1 : -1].split("-") # Append test function name and parameter to mlpackage path - mlpackage_path += f"{test_function_name}/" + serialization_path += f"{test_function_name}/" for parameter in parameters: - mlpackage_path += f"{parameter}/" + serialization_path += f"{parameter}/" else: - mlpackage_path += f"{test_name}/" + serialization_path += f"{test_name}/" - mlpackage_path += "model.mlpackage" + return serialization_path + +def _serialize_current_pytest_mlmodel(mlmodel) -> None: + """ + Usually pytest test name is of format file::class::test_function[param0-param1] (call)... + Assume each test produces only one Core ML model, + then file::class::test_function[param0-param1] is enough to determine unique name + {_COREMLTOOLS_DEBUG_SAVE_MLMODEL_DIRECTORY}/file/class/test_function/param0/param1/model.mlpackage + """ + mlpackage_path = _create_current_pytest_serialization_path() + "model.mlpackage" Path(mlpackage_path).mkdir(parents=True, exist_ok=True) mlmodel.save(mlpackage_path) + def assert_op_count_match(program, expect, op=None, verbose=False): """ Assert number of ops match expected number. If op is not specified, @@ -531,20 +536,20 @@ def ct_convert( skip_model_load = True mlmodel = converter( - program, - source=source, - inputs=inputs, - outputs=outputs, - classifier_config=classifier_config, - minimum_deployment_target=minimum_deployment_target, - convert_to=target, - compute_precision=compute_precision, - skip_model_load=skip_model_load, - **kwargs + program, + source=source, + inputs=inputs, + outputs=outputs, + classifier_config=classifier_config, + minimum_deployment_target=minimum_deployment_target, + convert_to=target, + compute_precision=compute_precision, + skip_model_load=skip_model_load, + **kwargs, ) if is_current_test_to_be_debugged: - _serialize_current_pytest(mlmodel) + _serialize_current_pytest_mlmodel(mlmodel) pytest.xfail("This test is to be debugged") return mlmodel diff --git a/coremltools/models/_compiled_model.py b/coremltools/models/_compiled_model.py index 0539f4433..2f95b407e 100644 --- a/coremltools/models/_compiled_model.py +++ b/coremltools/models/_compiled_model.py @@ -9,7 +9,10 @@ from coremltools import ComputeUnit as _ComputeUnit from coremltools.models.model import MLState as _MLState -from .model import MLModel as _MLModel +from .model import ( + _verify_optimization_hint_input, + MLModel as _MLModel, +) from .utils import _macos_version try: @@ -21,7 +24,12 @@ class CompiledMLModel: @staticmethod - def _init_check(path: str, compute_units: _ComputeUnit, function_name: str): + def _init_check( + path: str, + compute_units: _ComputeUnit, + function_name: str, + optimization_hints: _Optional[dict] = None, + ): if _macos_version() < (10, 13): raise Exception("Loading compiled Core ML models is only support on macOS 10.13 or higher.") if _MLModelProxy is None: @@ -35,11 +43,15 @@ def _init_check(path: str, compute_units: _ComputeUnit, function_name: str): if not isinstance(function_name, str): raise TypeError('The "function_name" parameter must be of type "str".') + _verify_optimization_hint_input(optimization_hints) + + def __init__( self, path: str, compute_units: _ComputeUnit = _ComputeUnit.ALL, function_name: _Optional[str] = None, + optimization_hints: _Optional[dict] = None, ): """ Loads a compiled Core ML model. @@ -59,6 +71,10 @@ def __init__( - ``coremltools.ComputeUnit.CPU_AND_NE``: Use both the CPU and neural engine, but not the GPU. Available only for macOS >= 13.0. + optimization_hints : dict or None + Keys are the names of the optimization hint, either 'reshapeFrequency' or 'specializationStrategy'. + Values are enumeration values of type ``coremltools.ReshapeFrequency`` or ``coremltools.SpecializationStrategy``. + Examples -------- .. sourcecode:: python @@ -73,10 +89,24 @@ def __init__( if function_name is None: function_name = "" - self._init_check(path, compute_units, function_name) + self._init_check(path, compute_units, function_name, optimization_hints) + + self.compute_unit = compute_units + self.function_name = function_name + if optimization_hints is not None: + self.optimization_hints = optimization_hints.copy() + else: + self.optimization_hints = None path = _expanduser(path) - self._proxy = _MLModelProxy(path, compute_units.name, function_name) + + if self.optimization_hints is not None: + optimization_hints_str_vals = {k: v.name for k, v in self.optimization_hints.items()} + else: + optimization_hints_str_vals = {} + + self._proxy = _MLModelProxy(path, compute_units.name, function_name, optimization_hints_str_vals) + def predict(self, data, state: _Optional[_MLState] = None): """ @@ -119,6 +149,7 @@ def predict(self, data, state: _Optional[_MLState] = None): self._proxy, _MLModel._update_float16_multiarray_input_to_float32, data, state ) + def make_state(self) -> _MLState: """ Returns a new state object, which can be passed to the ``predict`` method. diff --git a/coremltools/models/model.py b/coremltools/models/model.py index 96c1d2bfa..830d95a9b 100644 --- a/coremltools/models/model.py +++ b/coremltools/models/model.py @@ -15,9 +15,13 @@ import numpy as _np import numpy as _numpy -from coremltools import ComputeUnit as _ComputeUnit -from coremltools import _logger as logger -from coremltools import proto as _proto +from coremltools import ( + ComputeUnit as _ComputeUnit, + _logger as logger, + proto as _proto, + SpecializationStrategy as _SpecializationStrategy, + ReshapeFrequency as _ReshapeFrequency, +) from coremltools._deps import _HAS_TF_1, _HAS_TF_2, _HAS_TORCH from coremltools.converters.mil.mil.program import Program as _Program from coremltools.converters.mil.mil.scope import ScopeSource as _ScopeSource @@ -104,6 +108,30 @@ _METADATA_SOURCE_DIALECT = "com.github.apple.coremltools.source_dialect" +def _verify_optimization_hint_input(optimization_hint_input: _Optional[dict] = None) -> None: + """ + Throws an exception if ``optimization_hint_input`` is not valid. + """ + if optimization_hint_input is None: + return + if not isinstance(optimization_hint_input, dict): + raise TypeError('"optimization_hint_input" must be a dictionary or None') + + if optimization_hint_input != {} and _macos_version() < (15, 0): + raise ValueError('Optimization hints are only available on macOS >= 15.0') + + for k in optimization_hint_input.keys(): + if k not in ('reshapeFrequency', 'specializationStrategy'): + raise ValueError(f"Unrecognized key in optimization_hint dictionary: {k}") + + if "specializationStrategy" in optimization_hint_input and not isinstance(optimization_hint_input["specializationStrategy"], _SpecializationStrategy): + raise TypeError('"specializationStrategy" value of "optimization_hint_input" dictionary must be of type coremltools.SpecializationStrategy') + + if "reshapeFrequency" in optimization_hint_input and not isinstance(optimization_hint_input["reshapeFrequency"], _ReshapeFrequency): + raise TypeError('"reshapeFrequency" value of "optimization_hint_input" dictionary must be of type coremltools.ReshapeFrequency') + + + class _FeatureDescription: def __init__(self, fd_spec): self._fd_spec = fd_spec @@ -222,6 +250,7 @@ def __init__( compute_units=_ComputeUnit.ALL, weights_dir=None, function_name=None, + optimization_hints: _Optional[dict] = None, ): """ Construct an MLModel from an ``.mlmodel``. @@ -282,6 +311,10 @@ def __init__( The name of the function from ``model`` to load. If not provided, ``function_name`` will be set to the ``defaultFunctionName`` in the proto. + optimization_hints : dict or None + Keys are the names of the optimization hint, either 'reshapeFrequency' or 'specializationStrategy'. + Values are enumeration values of type ``coremltools.ReshapeFrequency`` or ``coremltools.SpecializationStrategy``. + Notes ----- Internally this maintains the following: @@ -342,8 +375,15 @@ def does_model_contain_mlprogram(model) -> bool: raise ValueError( 'coremltools.ComputeUnit.CPU_AND_NE is only available on macOS >= 13.0' ) + + _verify_optimization_hint_input(optimization_hints) + self.compute_unit = compute_units self.function_name = function_name + if optimization_hints is not None: + self.optimization_hints = optimization_hints.copy() + else: + self.optimization_hints = None self.is_package = False self.is_temp_package = False @@ -361,7 +401,7 @@ def does_model_contain_mlprogram(model) -> bool: self.is_temp_package = is_temp_package self._weights_dir = _try_get_weights_dir_path(model) self.__proxy__, self._spec, self._framework_error = self._get_proxy_and_spec( - model, compute_units, skip_model_load=skip_model_load, + model, compute_units, skip_model_load=skip_model_load, optimization_hints=optimization_hints, ) elif isinstance(model, _proto.Model_pb2.Model): if does_model_contain_mlprogram(model): @@ -381,7 +421,7 @@ def does_model_contain_mlprogram(model) -> bool: _save_spec(model, filename) self.__proxy__, self._spec, self._framework_error = self._get_proxy_and_spec( - filename, compute_units, skip_model_load=skip_model_load, + filename, compute_units, skip_model_load=skip_model_load, optimization_hints=optimization_hints ) try: _os.remove(filename) @@ -415,7 +455,11 @@ def does_model_contain_mlprogram(model) -> bool: self._model_input_names_set = set([i.name for i in f.input]) def _get_proxy_and_spec( - self, filename: str, compute_units: _ComputeUnit, skip_model_load: _Optional[bool] = False + self, + filename: str, + compute_units: _ComputeUnit, + skip_model_load: _Optional[bool] = False, + optimization_hints: _Optional[dict] = None, ): filename = _os.path.expanduser(filename) specification = _load_spec(filename) @@ -430,10 +474,14 @@ def _get_proxy_and_spec( return None, specification, None function_name = "" if self.function_name is None else self.function_name + if optimization_hints is not None: + optimization_hints_str_vals = {k: v.name for k, v in optimization_hints.items()} + else: + optimization_hints_str_vals = {} try: return ( - _MLModelProxy(filename, compute_units.name, function_name), + _MLModelProxy(filename, compute_units.name, function_name, optimization_hints_str_vals), specification, None, ) diff --git a/coremltools/optimize/__init__.py b/coremltools/optimize/__init__.py index ad15d7c90..fb8aba0a3 100644 --- a/coremltools/optimize/__init__.py +++ b/coremltools/optimize/__init__.py @@ -3,9 +3,9 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -from coremltools._deps import _HAS_TORCH +from coremltools._deps import _IMPORT_CT_OPTIMIZE_TORCH from . import coreml -if _HAS_TORCH: +if _IMPORT_CT_OPTIMIZE_TORCH: from . import torch diff --git a/coremltools/optimize/coreml/_quantization_passes.py b/coremltools/optimize/coreml/_quantization_passes.py index a6b1ec857..5c3b14774 100644 --- a/coremltools/optimize/coreml/_quantization_passes.py +++ b/coremltools/optimize/coreml/_quantization_passes.py @@ -1307,6 +1307,8 @@ def blockwise_compress( """ Compress original_data into n-bit representation by quantization. + mode: "LINEAR_SYMMETRIC" or "LINEAR". + block_sizes: Each element is the block size on corresponding axis for original_data. Returns None if the weight cannot be compressed (for example, the dim size on an axis is not diff --git a/coremltools/optimize/coreml/experimental/_post_training_quantization.py b/coremltools/optimize/coreml/experimental/_post_training_quantization.py index c8925d46e..c48330d09 100644 --- a/coremltools/optimize/coreml/experimental/_post_training_quantization.py +++ b/coremltools/optimize/coreml/experimental/_post_training_quantization.py @@ -4,7 +4,7 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause from collections import defaultdict -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union import numpy as np @@ -24,13 +24,17 @@ ) -def linear_quantize_activations(mlmodel: _MLModel, config: _OptimizationConfig, sample_data: List): +def linear_quantize_activations( + mlmodel: _MLModel, + config: _OptimizationConfig, + sample_data: List[Dict[Optional[str], np.ndarray]], +): """ Utility function to convert a float precision MLModel of type ``mlprogram``, which uses float-precision activations, into a compressed MLModel that uses n-bit activations. Currently, only n=8 is suppported. - This is achieved by feeding real sample data into the input MLModel, calibrating the resulting float activation values, + This is achieved by feeding real sample data into the input MLModel, calibrating the resulting float activation values, converting the calibrated values into ``quantize`` and ``dequantize`` op pairs, and inserting those op pairs into the new MLModel instance where activations get quantized. @@ -47,7 +51,9 @@ def linear_quantize_activations(mlmodel: _MLModel, config: _OptimizationConfig, sample_data: List Data used to characterize statistics of the activation values of the original float precision model. - Expects a list of sample input dictionaries. + Expects a list of sample input dictionaries, which should have the same format as the data used in `.predict` + method for the mlmodel. More specifically, the input name need to be specified in the data, unless it's a single + input model where the name will be auto inferred. Returns ------- @@ -77,6 +83,17 @@ def linear_quantize_activations(mlmodel: _MLModel, config: _OptimizationConfig, ) compressed_model_w8a8 = cto.linear_quantize_weights(compressed_model_a8, weight_config) """ + # Validate Sample data. If the sample data name is not provided, try to infer it. + for sample in sample_data: + if None in sample.keys(): + input_spec = mlmodel.get_spec().description.input + if len(sample.keys()) > 1 or len(input_spec) > 1: + raise ValueError( + "When the model has multiple inputs, please provide the name for each data in `sample_data`" + ) + inferred_input_name = input_spec[0].name + sample[inferred_input_name] = sample[None] + del sample[None] ### Apply four major graph passes in order. @@ -220,17 +237,25 @@ def _adjust_concat_surrounding_activation_stats( group_rmin_list, group_rmax_list = [], [] for tensor_name in concat_group: - group_rmin_list.append(activation_stats_dict[tensor_name]["rmin"]) - group_rmax_list.append(activation_stats_dict[tensor_name]["rmax"]) + # Some tensor_name may not have rmin/rmax if the calibration failed before. + if tensor_name in activation_stats_dict: + group_rmin_list.append(activation_stats_dict[tensor_name]["rmin"]) + group_rmax_list.append(activation_stats_dict[tensor_name]["rmax"]) + + if len(group_rmin_list) == 0: + raise ValueError( + "None of the calibration run succeeded. Please check logs about calibrating sample failures." + ) group_rmin, group_rmax = min(group_rmin_list), max(group_rmax_list) for tensor_name in concat_group: - activation_stats_dict[tensor_name]["rmin"] = group_rmin - activation_stats_dict[tensor_name]["rmax"] = group_rmax + if tensor_name in activation_stats_dict: + activation_stats_dict[tensor_name]["rmin"] = group_rmin + activation_stats_dict[tensor_name]["rmax"] = group_rmax def _get_activation_calibration_stats( - fpmodel: _MLModel, sample_data: List + fpmodel: _MLModel, sample_data: List[Dict[str, np.ndarray]] ) -> Dict[str, Dict[str, float]]: """ Calibration and store a dict of intermediate tensor stats. @@ -246,7 +271,6 @@ def _get_activation_calibration_stats( ------- activation_calibration_stats: dict """ - logger.warning( "Running compression pass linear_quantize_activations: start calibrating {} samples".format( len(sample_data) diff --git a/coremltools/optimize/torch/quantization/_backend_config.py b/coremltools/optimize/torch/quantization/_backend_config.py index 80f7b4624..52d3c57fa 100644 --- a/coremltools/optimize/torch/quantization/_backend_config.py +++ b/coremltools/optimize/torch/quantization/_backend_config.py @@ -29,7 +29,7 @@ activation_configs as _activation_configs, ) from coremltools.optimize.torch.quantization._backend_config_utils import ( - binary_op_act_configs as _binary_op_relu_configs, + binary_op_act_configs as _binary_op_act_configs, ) from coremltools.optimize.torch.quantization._backend_config_utils import ( binary_op_configs as _binary_op_configs, @@ -724,7 +724,7 @@ def _add_act() -> _List[_BackendPatternConfig]: FakeQuant -> """ acts = _mod_activations + _func_activations + (_nn.ReLU, _F.relu, _torch.relu) - return _binary_op_relu_configs(ops=[_operator.add, _torch.add], acts=list(acts)) + return _binary_op_act_configs(ops=[_operator.add, _torch.add], acts=list(acts)) @_BackendConfigRegistry.register() @@ -741,7 +741,7 @@ def _mul_act() -> _List[_BackendPatternConfig]: FakeQuant -> """ acts = _mod_activations + _func_activations + (_nn.ReLU, _F.relu, _torch.relu) - return _binary_op_relu_configs(ops=[_operator.mul, _torch.mul], acts=list(acts)) + return _binary_op_act_configs(ops=[_operator.mul, _torch.mul], acts=list(acts)) @_BackendConfigRegistry.register() @@ -758,7 +758,23 @@ def _matmul_act() -> _List[_BackendPatternConfig]: FakeQuant -> """ acts = _mod_activations + _func_activations + (_nn.ReLU, _F.relu, _torch.relu) - return _binary_op_relu_configs(ops=[_torch.matmul], acts=list(acts)) + return _binary_op_act_configs(ops=[_torch.matmul], acts=list(acts)) + + +@_BackendConfigRegistry.register() +def _einsum_act() -> _List[_BackendPatternConfig]: + """ + float: + input_1 -> + einsum -> Act -> output + input_2 -> + qat: + FakeQuant -> + einsum -> Act -> FakeQuant + FakeQuant -> + """ + acts = _mod_activations + _func_activations + (_nn.ReLU, _F.relu, _torch.relu) + return _binary_op_act_configs(ops=[_torch.einsum], acts=list(acts)) @_BackendConfigRegistry.register() @@ -809,6 +825,21 @@ def _matmul() -> _List[_BackendPatternConfig]: return _binary_op_configs(ops=[_torch.matmul]) +@_BackendConfigRegistry.register() +def _einsum() -> _List[_BackendPatternConfig]: + """ + float: + input_1 -> + einsum -> output + input_2 -> + qat: + FakeQuant -> + einsum -> FakeQuant + FakeQuant -> + """ + return _binary_op_configs(ops=[_torch.einsum]) + + @_BackendConfigRegistry.register() def _cat() -> _List[_BackendPatternConfig]: """ diff --git a/coremltools/optimize/torch/quantization/_utils.py b/coremltools/optimize/torch/quantization/_utils.py index f283026e9..f0042b448 100644 --- a/coremltools/optimize/torch/quantization/_utils.py +++ b/coremltools/optimize/torch/quantization/_utils.py @@ -3,6 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +import math import operator as _operator from collections import defaultdict from enum import Enum as _Enum @@ -205,11 +206,23 @@ def get_quant_range(n_bits: int, dtype: _torch.dtype) -> _Tuple[int, int]: quant_max = max_q / 2 - 1 return int(quant_min), int(quant_max) +def get_n_bits_from_range(quant_min: int, quant_max: int) -> int: + """ + Returns quantization n_bits for given quantization range + """ + n_bits = int(math.log2(quant_max + 1)) + if quant_min < 0: + n_bits += 1 + + return n_bits -def register_compression_metadata(submodule, config): + +def register_compression_metadata(submodule): metadata = _CompressionMetadata("weight") metadata.compression_type = ["quantization"] - metadata.quantization_n_bits = config.weight_n_bits + metadata.quantization_n_bits = get_n_bits_from_range( + submodule.weight_quant_min, submodule.weight_quant_max + ) metadata.quantization_scale = ( submodule.weight_scale.detach().clone().unsqueeze(-1) if submodule.weight_axis == 0 diff --git a/coremltools/optimize/torch/quantization/quantizer.py b/coremltools/optimize/torch/quantization/quantizer.py index 30eab3a8c..726ed5152 100644 --- a/coremltools/optimize/torch/quantization/quantizer.py +++ b/coremltools/optimize/torch/quantization/quantizer.py @@ -283,8 +283,7 @@ def finalize( _register_metadata_version(finalized_model) for name, submodule in finalized_model.named_modules(remove_duplicate=True): if hasattr(submodule, "weight_scale"): - submod_config = self._config.get_module_config(name, submodule) - _register_compression_metadata(submodule, submod_config) + _register_compression_metadata(submodule) if model is None: self._model = finalized_model diff --git a/coremltools/test/api/test_api_visibilities.py b/coremltools/test/api/test_api_visibilities.py index ea8fe22aa..7a3ce26bc 100644 --- a/coremltools/test/api/test_api_visibilities.py +++ b/coremltools/test/api/test_api_visibilities.py @@ -45,6 +45,8 @@ def _check_visible_modules(actual, expected): "libmilstoragepython", "optimize", "StateType", + "ReshapeFrequency", + "SpecializationStrategy", ] diff --git a/coremltools/test/ml_program/test_compression.py b/coremltools/test/ml_program/test_compression.py index 7452c1710..b3989d469 100644 --- a/coremltools/test/ml_program/test_compression.py +++ b/coremltools/test/ml_program/test_compression.py @@ -20,7 +20,9 @@ def get_test_model_and_data( - multi_layer: bool = False, quantize_config: Optional[OpCompressorConfig] = None + multi_layer: bool = False, + quantize_config: Optional[OpCompressorConfig] = None, + use_linear: bool = False, ): """ Prepare test model and data. @@ -29,19 +31,24 @@ def get_test_model_and_data( :param quantize_config: If set, the weights in the test model will be nbits quantization-friendly, which means it will be first quantized according to the config, and then dequantized, so the numerical error introduced during the quantization test will be minimum. + :param use_linear: If set, use linear instead of conv in the model. """ if quantize_config is not None and multi_layer: raise AssertionError("Multi-layer model doesn't support pre_quantize_nbits.") inputs = [ct.TensorType(name="data", shape=(1, 64, 10, 10))] + if use_linear: + inputs = [ct.TensorType(name="data", shape=(1, 64))] + torch_input_values = [torch.rand(*i.shape.to_list()) for i in inputs] coreml_input_values = { i.name: val.detach().numpy() for i, val in zip(inputs, torch_input_values) } if multi_layer: - class Model(torch.nn.Module): + + class ConvModel(torch.nn.Module): def __init__(self): - super(Model, self).__init__() + super(ConvModel, self).__init__() self.conv_1 = torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=2) self.conv_2 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=2) @@ -50,9 +57,22 @@ def forward(self, x): conv_2 = self.conv_2(conv_1) return conv_2 - model = Model().eval() + class LinearModel(torch.nn.Module): + def __init__(self): + super(LinearModel, self).__init__() + self.linear_1 = torch.nn.Linear(in_features=64, out_features=32, bias=False) + self.linear_2 = torch.nn.Linear(in_features=32, out_features=16, bias=False) + + def forward(self, x): + linear_1 = self.linear_1(x) + return self.linear_2(linear_1) + + model = LinearModel().eval() if use_linear else ConvModel().eval() else: model = torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=2) + if use_linear: + model = torch.nn.Linear(in_features=64, out_features=32, bias=False) + if quantize_config is not None: # Manually change weight to make it quantization friendly. nbits_range_max = 2 ** (quantize_config.nbits - 1) - 1 diff --git a/coremltools/test/modelpackage/test_modelpackage.py b/coremltools/test/modelpackage/test_modelpackage.py index 1618194e8..9d2cab934 100644 --- a/coremltools/test/modelpackage/test_modelpackage.py +++ b/coremltools/test/modelpackage/test_modelpackage.py @@ -3,6 +3,8 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import itertools import json import os import platform @@ -41,7 +43,6 @@ def _remove_path(path): class TestMLModel: def setup_class(self): - spec = Model_pb2.Model() spec.specificationVersion = coremltools.SPECIFICATION_VERSION @@ -459,6 +460,47 @@ def forward(self, x): shutil.rmtree(package_path) + @pytest.mark.skipif(utils._macos_version() < (15, 0), + reason="optimization hints available only on macOS15+") + @pytest.mark.parametrize("reshapeFrequency, specializationStrategy", + itertools.product( + (ct.ReshapeFrequency.Frequent, ct.ReshapeFrequency.Infrequent, None), + (ct.SpecializationStrategy.FastPrediction, ct.SpecializationStrategy.Default, None), + )) + def test_optimization_hints(self, reshapeFrequency, specializationStrategy): + optimization_hints={} + if reshapeFrequency is not None: + optimization_hints['reshapeFrequency'] = reshapeFrequency + if specializationStrategy is not None: + optimization_hints['specializationStrategy'] = specializationStrategy + if len(optimization_hints) == 0: + optimization_hints = None + + m = MLModel(self.spec, optimization_hints=optimization_hints) + assert isinstance(m, MLModel) + assert(m.optimization_hints == optimization_hints) + + + @pytest.mark.skipif(utils._macos_version() < (15, 0), + reason="optimization hints available only on macOS15+") + def test_optimization_hint_error_cases(self): + with pytest.raises(TypeError, match='"optimization_hint_input" must be a dictionary'): + MLModel(self.spec, optimization_hints=12) + + with pytest.raises(ValueError, match='Unrecognized key in optimization_hint dictionary: bad key'): + MLModel(self.spec, optimization_hints={'bad key': ct.ReshapeFrequency.Frequent}) + + with pytest.raises(TypeError, match='"specializationStrategy" value of "optimization_hint_input" dictionary must be of type coremltools.SpecializationStrategy'): + MLModel(self.spec, optimization_hints={"specializationStrategy": 12}) + + with pytest.raises(TypeError, match='"reshapeFrequency" value of "optimization_hint_input" dictionary must be of type coremltools.ReshapeFrequency'): + MLModel(self.spec, optimization_hints={"reshapeFrequency": 12}) + + with pytest.raises(TypeError, match='"reshapeFrequency" value of "optimization_hint_input" dictionary must be of type coremltools.ReshapeFrequency'): + # SpecializationStrategy value for ReshapeFrequency key + MLModel(self.spec, optimization_hints={"reshapeFrequency": ct.SpecializationStrategy.Default}) + + class TestCompiledMLModel: @pytest.mark.skipif(ct.utils._macos_version() < (15, 0), reason="State only supported on macOS 15+") def test_state(self): diff --git a/coremltools/test/neural_network/test_compiled_model.py b/coremltools/test/neural_network/test_compiled_model.py index d2595ed4e..0e53acf3d 100644 --- a/coremltools/test/neural_network/test_compiled_model.py +++ b/coremltools/test/neural_network/test_compiled_model.py @@ -3,12 +3,14 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import itertools from shutil import copytree, rmtree from tempfile import TemporaryDirectory import pytest -from coremltools import ComputeUnit +from coremltools import ComputeUnit, ReshapeFrequency, SpecializationStrategy, utils from coremltools.models import CompiledMLModel, MLModel from coremltools.models.utils import compile_model, load_spec, save_spec from coremltools.proto import Model_pb2 @@ -36,6 +38,12 @@ def setup(self): spec.description.predictedFeatureName = 'y' self.spec = spec + self.compiled_model_path = compile_model(self.spec) + + + def teardown_class(self): + rmtree(self.compiled_model_path) + def _test_compile_model_path(self, compiled_model_path, compute_units=ComputeUnit.ALL): try: @@ -114,3 +122,24 @@ def test_save_load_spec(self): my_spec = load_spec(file_path) compiled_model_path = compile_model(my_spec) self._test_compile_model_path(compiled_model_path) + + + @pytest.mark.skipif(utils._macos_version() < (15, 0), + reason="optimization hints available only on macOS15+") + @pytest.mark.parametrize("reshapeFrequency, specializationStrategy", + itertools.product( + (ReshapeFrequency.Frequent, ReshapeFrequency.Infrequent, None), + (SpecializationStrategy.FastPrediction, SpecializationStrategy.Default, None), + )) + def test_optimization_hints(self, reshapeFrequency, specializationStrategy): + optimization_hints={} + if reshapeFrequency is not None: + optimization_hints['reshapeFrequency'] = reshapeFrequency + if specializationStrategy is not None: + optimization_hints["specializationStrategy"] = specializationStrategy + if len(optimization_hints) == 0: + optimization_hints = None + + m = CompiledMLModel(self.compiled_model_path, optimization_hints=optimization_hints) + assert isinstance(m, CompiledMLModel) + assert(m.optimization_hints == optimization_hints) diff --git a/coremltools/test/neural_network/test_tf_numeric.py b/coremltools/test/neural_network/test_tf_numeric.py index e248a98c2..c952cea6f 100644 --- a/coremltools/test/neural_network/test_tf_numeric.py +++ b/coremltools/test/neural_network/test_tf_numeric.py @@ -165,11 +165,6 @@ def test_data_reorganize_cpu_only(self): self.test_data_reorganize(cpu_only=True) def test_depthwise_conv(self, cpu_only=False): - if not cpu_only: - pytest.xfail( - "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" - ) - def get_coreml_model_depthwise(X, params, w): eval = True mlmodel = None diff --git a/coremltools/test/optimize/api/test_optimize_api.py b/coremltools/test/optimize/api/test_optimize_api.py index cb2e1d8b8..1827ab01d 100644 --- a/coremltools/test/optimize/api/test_optimize_api.py +++ b/coremltools/test/optimize/api/test_optimize_api.py @@ -332,6 +332,46 @@ def test_programmatic_example_2(self): output_file = tempfile.NamedTemporaryFile(suffix=".mlpackage").name coreml_model.save(output_file) + def test_quantize_submodule(self): + import torch + from torchvision.models import mobilenet_v3_small + + import coremltools as ct + from coremltools.optimize.torch.quantization import LinearQuantizer + + class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.model1 = mobilenet_v3_small() + self.model2 = mobilenet_v3_small() + + def forward(self, x): + return self.model1(x), self.model2(x) + + model = Model() + data = torch.randn(1, 3, 224, 224) + example_inputs = (data,) + + quantizer = LinearQuantizer(model.model1) + model.model1 = quantizer.prepare(example_inputs=example_inputs) + model(data) + model.model1 = quantizer.finalize() + + model = model.eval() + traced_model = torch.jit.trace(model, example_inputs=example_inputs) + coreml_model = ct.convert( + traced_model, + convert_to="mlprogram", + inputs=[ct.TensorType(shape=data.shape)], + minimum_deployment_target=ct.target.iOS18, + skip_model_load=True, + ) + assert coreml_model is not None + quant_ops = coreml_model._mil_program.functions["main"].find_ops( + op_type="constexpr_blockwise_shift_scale" + ) + assert len(quant_ops) > 0 + class TestConvertingCompressedSourceModels: """ diff --git a/coremltools/test/optimize/coreml/test_post_training_quantization.py b/coremltools/test/optimize/coreml/test_post_training_quantization.py index 7b40e66fc..3090106b5 100644 --- a/coremltools/test/optimize/coreml/test_post_training_quantization.py +++ b/coremltools/test/optimize/coreml/test_post_training_quantization.py @@ -1379,7 +1379,12 @@ def test_palettization_pcs(self, compute_unit, backend): )[0] assert types.builtin_to_string(palettize_op.indices.dtype) == "uint4" # The per-channel-scale is represented by a quant op to do scaling. - assert palettize_op.outputs[0].child_ops[0].op_type == "constexpr_blockwise_shift_scale" + quantize_ops = mlmodel_palettized._mil_program.functions["main"].find_ops( + op_type="constexpr_blockwise_shift_scale" + ) + assert len(quantize_ops) > 0 + # Order of quant and lut op is determined by canonicalize_quantized_lut_pattern graph pass. + assert quantize_ops[0].outputs[0].child_ops[0].op_type == "constexpr_lut_to_dense" if _macos_version() >= (15, 0): verify_model_outputs(mlmodel, mlmodel_palettized, coreml_input_values) @@ -1698,7 +1703,7 @@ def test_default_prune_pipeline_ios18(self, compute_unit, backend): assert types.builtin_to_string(sparse_op.shape.dtype) == "uint32" if _macos_version() >= (15, 0): - verify_model_outputs(mlmodel, mlmodel_pruned, coreml_input_values, rtol=2e-3, atol=2e-3) + verify_model_outputs(mlmodel, mlmodel_pruned, coreml_input_values, rtol=3e-3, atol=2e-3) class TestJointCompressWeights: @@ -1938,18 +1943,27 @@ def test_joint_prune_palettize_weights( ) @pytest.mark.parametrize( - "compute_unit, backend, nbits, channel_group_size", + "compute_unit, backend, nbits, channel_group_size, quantize_first", itertools.product( compute_units, backends, (3, 4, 8), (0, 1, 2), + (True, False), ), ) def test_joint_palettize_quantize_weights( - self, compute_unit, backend, nbits, channel_group_size + self, compute_unit, backend, nbits, channel_group_size, quantize_first ): - """First palettize to get fp16 lut, and then quantize the lut to make int8 lut.""" + """ + If quantize_first is True: + First quantize to get int8 weight, and then palettize to n-bit lut with int8 entries. + If quantize_first is False: + First palettize to get fp16 lut, and then quantize the lut to make int8 lut. + + Notice no matter applies which one first, the final output model's op order is guaranteed to be consistent + by the common::canonicalize_quantized_lut_pattern graph pass. + """ model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data_complex() torchmodel = torch.jit.trace(model, torch_input_values) mlmodel = ct.convert( @@ -1982,10 +1996,17 @@ def test_joint_palettize_quantize_weights( ) ) - mlmodel_palettized = cto.coreml.palettize_weights(mlmodel, palettize_config) - mlmodel_joint_palettized_quantized = cto.coreml.linear_quantize_weights( - mlmodel_palettized, quant_config, joint_compression=True - ) + if quantize_first: + mlmodel_quantized = cto.coreml.linear_quantize_weights(mlmodel, quant_config) + mlmodel_joint_palettized_quantized = cto.coreml.palettize_weights( + mlmodel_quantized, palettize_config, joint_compression=True + ) + else: + mlmodel_palettized = cto.coreml.palettize_weights(mlmodel, palettize_config) + mlmodel_joint_palettized_quantized = cto.coreml.linear_quantize_weights( + mlmodel_palettized, quant_config, joint_compression=True + ) + expected_ops = ( ["constexpr_blockwise_shift_scale", "constexpr_lut_to_dense", "conv"] * 2 + ["reshape"] @@ -1995,13 +2016,13 @@ def test_joint_palettize_quantize_weights( ) prog = mlmodel_joint_palettized_quantized._mil_program if channel_group_size == 0: - # When use per-tensor lut, the lut size is too small, so it's stored as ImmediateValue + # When doing lut first with per-tensor lut, the lut size is too small, so it's stored as ImmediateValue # which won't be quantized. ops_in_prog = get_op_types_in_program(prog) - if nbits >= 4: - assert ops_in_prog.count("constexpr_blockwise_shift_scale") >= 6 - else: + if nbits < 4 and not quantize_first: assert ops_in_prog.count("constexpr_blockwise_shift_scale") == 0 + else: + assert ops_in_prog.count("constexpr_blockwise_shift_scale") >= 6 else: assert get_op_types_in_program(prog) == expected_ops @@ -2069,81 +2090,6 @@ def test_joint_palettize_quantize_weights_invalid(self, compute_unit, backend): mlmodel_palettized, quant_config, joint_compression=True ) - @pytest.mark.parametrize( - "compute_unit, backend, nbits, channel_group_size", - itertools.product( - compute_units, - backends, - (3, 4, 8), - (0, 1, 2), - ), - ) - def test_joint_quantize_palettize_weights( - self, compute_unit, backend, nbits, channel_group_size - ): - """First quantize to get int8 weight, and then palettize to n-bit lut with int8 entries.""" - model, inputs, torch_input_values, coreml_input_values = get_test_model_and_data_complex() - torchmodel = torch.jit.trace(model, torch_input_values) - mlmodel = ct.convert( - torchmodel, - inputs=inputs, - convert_to="mlprogram", - minimum_deployment_target=backend.opset_version, - compute_precision=ct.precision.FLOAT16 - if backend.precision == "fp16" - else ct.precision.FLOAT32, - compute_units=compute_unit, - ) - - quant_config = cto.coreml.OptimizationConfig( - global_config=cto.coreml.OpLinearQuantizerConfig( - mode="linear", - dtype="int8", - granularity="per_tensor", - weight_threshold=500, - ) - ) - palettize_config = cto.coreml.OptimizationConfig( - global_config=cto.coreml.OpPalettizerConfig( - mode="uniform", - nbits=nbits, - granularity="per_grouped_channel", - group_size=channel_group_size, - weight_threshold=500, - ) - ) - - mlmodel_quantized = cto.coreml.linear_quantize_weights(mlmodel, quant_config) - mlmodel_joint_quantized_palettized = cto.coreml.palettize_weights( - mlmodel_quantized, palettize_config, joint_compression=True - ) - expected_ops = ( - ["constexpr_lut_to_dense", "constexpr_blockwise_shift_scale", "conv"] * 2 - + ["reshape"] - + ["constexpr_lut_to_dense", "constexpr_blockwise_shift_scale", "linear"] * 2 - + ["constexpr_lut_to_dense", "constexpr_blockwise_shift_scale"] * 3 - + ["lstm", "expand_dims", "expand_dims"] - ) - prog = mlmodel_joint_quantized_palettized._mil_program - assert get_op_types_in_program(prog) == expected_ops - - for linear_op in prog.find_ops(op_type="linear"): - assert linear_op.weight.op.op_type == "constexpr_blockwise_shift_scale" - for conv_op in prog.find_ops(op_type="conv"): - assert conv_op.weight.op.op_type == "constexpr_blockwise_shift_scale" - - for palettize_op in prog.find_ops(op_type="constexpr_lut_to_dense"): - assert palettize_op.lut.dtype == types.int8 - assert palettize_op.indices.dtype == types.string_to_builtin(f"uint{nbits}") - assert palettize_op.outputs[0].child_ops[0].op_type == "constexpr_blockwise_shift_scale" - for quantize_op in prog.find_ops(op_type="constexpr_blockwise_shift_scale"): - assert quantize_op.data.dtype == types.int8 - assert quantize_op.scale.dtype == types.fp16 - assert quantize_op.offset.dtype == types.int8 - - if _macos_version() >= (15, 0): - verify_model_outputs(mlmodel, mlmodel_joint_quantized_palettized, coreml_input_values) - @pytest.mark.xfail( reason="rdar://131511244 Investigate Why Joint Prune x Anything are Failing on BNNS" ) diff --git a/coremltools/test/optimize/torch/quantization/test_configure.py b/coremltools/test/optimize/torch/quantization/test_configure.py index 024c89d53..1807dc978 100644 --- a/coremltools/test/optimize/torch/quantization/test_configure.py +++ b/coremltools/test/optimize/torch/quantization/test_configure.py @@ -999,7 +999,10 @@ def test_embedding_layer_quantization(activation_dtype): @pytest.mark.parametrize("config", get_configs_for_qscheme()) @pytest.mark.parametrize("activation_fn", list(_mod_activations) + [nn.ReLU]) -@pytest.mark.parametrize("elementwise_op", [operator.add, torch.add, operator.mul, torch.mul]) +@pytest.mark.parametrize( + "elementwise_op", + [operator.add, torch.add, operator.mul, torch.mul, torch.matmul, torch.einsum], +) @pytest.mark.parametrize("conv_transpose", [False, True]) def test_elementwise_op_act_fusion(config, activation_fn, elementwise_op, conv_transpose): class ElementWiseActModule(torch.nn.Module): @@ -1012,6 +1015,11 @@ def __init__(self, conv_transpose): self.act = activation_fn() def forward(self, x): + if elementwise_op == torch.einsum: + return self.act( + elementwise_op("bkhq,bchk->bchq", x.transpose(1, 3), self.conv1(x)) + ) + return self.act(elementwise_op(x, self.conv1(x))) model = ElementWiseActModule(conv_transpose) diff --git a/coremltools/test/optimize/torch/quantization/test_quantizer.py b/coremltools/test/optimize/torch/quantization/test_quantizer.py index ef67d23a7..b9dcb645c 100644 --- a/coremltools/test/optimize/torch/quantization/test_quantizer.py +++ b/coremltools/test/optimize/torch/quantization/test_quantizer.py @@ -418,7 +418,13 @@ def test_linear_quantizer_report( print("\nREPORT\n" + str(report)) -@pytest.mark.parametrize("dtype", ["qint4", "qint8"]) +@pytest.mark.parametrize( + "dtype", + [ + pytest.param("qint4", marks=pytest.mark.xfail(reason="rdar://134169158")), + "qint8", + ], +) @pytest.mark.parametrize("scheme", ["symmetric", "affine"]) @pytest.mark.parametrize("conv_transpose", [False, True]) def test_compression_metadata(dtype, scheme, conv_transpose): @@ -432,6 +438,7 @@ def test_compression_metadata(dtype, scheme, conv_transpose): "conv1", (nn.Conv2d(1, 20, 3) if not conv_transpose else nn.ConvTranspose2d(1, 20, 3)), ), + ("relu", nn.ReLU()), ("fc1", nn.Linear(20, 100)), ] ) @@ -444,7 +451,7 @@ def test_compression_metadata(dtype, scheme, conv_transpose): "quantization_scheme": scheme, }, "fc1": None, - } + }, } ) quantizer = LinearQuantizer(model, config) @@ -457,7 +464,7 @@ def test_compression_metadata(dtype, scheme, conv_transpose): assert "_COREML_/metadata_version" in model.state_dict() # Verify compression metadata is added for conv1 - metadata_dict = CompressionMetadata.from_state_dict(model.conv1.state_dict()) + metadata_dict = CompressionMetadata.from_state_dict(model.conv1[0].state_dict()) assert len(metadata_dict) == 1 assert "weight" in metadata_dict @@ -470,6 +477,6 @@ def test_compression_metadata(dtype, scheme, conv_transpose): if scheme == "symmetric": assert torch.all(metadata.zero_point == 0) - # # Verify no compression metadata is added for fc1 + # Verify no compression metadata is added for fc1 metadata_dict = CompressionMetadata.from_state_dict(model.fc1.state_dict()) assert len(metadata_dict) == 0 diff --git a/coremltools/test/optimize/torch/quantization/test_utils.py b/coremltools/test/optimize/torch/quantization/test_utils.py index 3df9fe42e..45c321d0f 100644 --- a/coremltools/test/optimize/torch/quantization/test_utils.py +++ b/coremltools/test/optimize/torch/quantization/test_utils.py @@ -6,7 +6,7 @@ import pytest import torch -from coremltools.optimize.torch.quantization._utils import get_quant_range +from coremltools.optimize.torch.quantization._utils import get_n_bits_from_range, get_quant_range @pytest.mark.parametrize("n_bits", list(range(2, 8))) @@ -37,3 +37,11 @@ def test_quant_range(dtype, n_bits): else: assert quant_min == signed_expected_values[n_bits][0] assert quant_max == signed_expected_values[n_bits][1] + + +@pytest.mark.parametrize("n_bits", list(range(2, 8))) +@pytest.mark.parametrize("dtype", [torch.quint8, torch.uint8, torch.qint8, torch.int8]) +def test_n_bits_from_range(dtype, n_bits): + quant_min, quant_max = get_quant_range(n_bits, dtype) + output_n_bits = get_n_bits_from_range(quant_min, quant_max) + assert output_n_bits == n_bits diff --git a/coremltools/version.py b/coremltools/version.py index 8ef6b5a8a..9896fb832 100644 --- a/coremltools/version.py +++ b/coremltools/version.py @@ -4,4 +4,4 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -__version__ = "8.0b2" # VERSION_STRING +__version__ = "8.0" # VERSION_STRING diff --git a/docs-guides/source/flexible-inputs.md b/docs-guides/source/flexible-inputs.md index 55eacacf5..3412ee77c 100644 --- a/docs-guides/source/flexible-inputs.md +++ b/docs-guides/source/flexible-inputs.md @@ -162,6 +162,18 @@ You can open the saved ML package in Xcode and click the **Predictions** tab to ![Range shape](images/range_shape.png) +## Reshape Frequency Optimization Hint + +Setting the Reshape Frequency Optimization Hint to `Frequent` can allow flexible shaped models to run on the Neural Engine. This option can be set when loading your model: + +```python +model = ct.model.MLModel( + 'path/to/the/saved/model.mlmodel', + optimization_hints={ 'reshapeFrequency': ct.ReshapeFrequency.Frequent } +) +p``` + + ## Enable Unbounded Ranges ```{warning} diff --git a/docs-guides/source/model-prediction.md b/docs-guides/source/model-prediction.md index fdc2606b6..0127beadf 100644 --- a/docs-guides/source/model-prediction.md +++ b/docs-guides/source/model-prediction.md @@ -67,6 +67,17 @@ In previous versions of coremltools, you would restrict execution to the CPU by For more information and values for this parameter, see [Set the Compute Units](load-and-convert-model.md#set-the-compute-units). +## Fast Predictions + +A Model can be loaded using the Fast Prediction Optimization Hint. This will prefer the prediction latency at the potential cost of specialization time, memory footprint, and the disk space usage. + +```python +model = ct.model.MLModel( + 'path/to/the/saved/model.mlmodel', + optimization_hints={ 'specializationStrategy': ct.SpecializationStrategy.FastPrediction } +) +``` + ## Multi-array Prediction A model that takes a `MultiArray` input requires a NumPy array as an input with the `predict()` call. For example: diff --git a/reqs/test.pip b/reqs/test.pip index 4d2b15a56..43b3ebb1e 100644 --- a/reqs/test.pip +++ b/reqs/test.pip @@ -31,15 +31,16 @@ gast==0.4.0 # torch 2.3 dropped support for x86 macOS torch==2.2.0; platform_machine != "arm64" -torch==2.3.0; platform_machine == "arm64" -executorch==0.2.0; platform_machine == "arm64" and python_version >= '3.10' and python_version <= '3.11' +torch==2.4.0; platform_machine == "arm64" +executorch==0.3.0; platform_machine == "arm64" and python_version >= '3.10' and python_version <= '3.11' torchaudio==2.2.0; platform_machine != "arm64" -torchaudio==2.3.0; platform_machine == "arm64" +torchaudio==2.4.0; platform_machine == "arm64" torchvision==0.17.0; platform_machine != "arm64" -torchvision==0.18.0; platform_machine == "arm64" +torchvision==0.19.0; platform_machine == "arm64" +torchao==0.4.0; platform_machine == "arm64" and python_version == '3.10' -torchsr==1.0.4; platform_machine == "arm64" and python_version >= '3.10' and python_version <= '3.11' -timm==0.6.13; platform_machine == "arm64" and python_version >= '3.10' and python_version <= '3.11' +torchsr==1.0.4; platform_machine == "arm64" +timm==0.6.13; platform_machine == "arm64" xgboost==1.4.2; platform_machine != "arm64" mock wrapt