Push some internal patches (#7)

* Support new APUWareUtils There is new version of APUWareUtils in Android 12. * Support scalar input on PReLU and LeakyReLU * Support quantized LeakyReLU * Handle version 2 of TransposeConv Update the verification to version 2 for INT8 TransposeConv. * Support single input Reshape * Fix handling of int8 inputs in TransposeConv * Fix int8 delegation for activations * Check Concatenation input type * Support TransposeConv with bias * Support the following OPs - FullyConnected with keep_num_dims == true - Pack * Reject Gather with non-constant positions tensor * Relax the limitation of FullyConnectede without bias * print unsupport op name instead of op code Co-authored-by: Code Lin <[email protected]>
MediaTek-NeuroPilot · Aug 24, 2021 · 86fc333 · 86fc333
1 parent a05721a
commit 86fc333
Show file tree

Hide file tree

Showing 5 changed files with 352 additions and 135 deletions.
diff --git a/neuron/APUWareUtilsApi.h b/neuron/APUWareUtilsApi.h
@@ -1,30 +1,29 @@
 /*
-* Copyright (C) 2021 MediaTek Inc., this file is modified on 02/26/2021
-* by MediaTek Inc. based on MIT License .
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the ""Software""), to
-* deal in the Software without restriction, including without limitation the
-* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-* sell copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
+ * Copyright (C) 2021 MediaTek Inc., this file is modified on 02/26/2021
+ * by MediaTek Inc. based on MIT License .
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the ""Software""), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
 
 #pragma once
 
 #include <android/log.h>
 #include <dlfcn.h>
-#include <cstdlib>
 
 #include <cstdlib>
 #include <memory>
@@ -40,8 +39,6 @@ typedef enum {
   PERFORMANCE_MODE_MAX,
 } PERFORMANCE_MODE_E;
 
-#define ABORT_ON_DLOPEN_ERROR
-
 //------------------------------------- -------------------------------------
 #define APUWARE_LOG_D(format, ...)                                  \
   __android_log_print(ANDROID_LOG_DEBUG, "APUWARELIB", format "\n", \
@@ -86,7 +83,12 @@ inline void* loadApuWareUtilsLibrary(const char* name) {
 
 inline void* getApuWareUtilsLibraryHandle() {
   if (sAPUWareUtilsLibHandle == nullptr) {
-    sAPUWareUtilsLibHandle = loadApuWareUtilsLibrary("libapuwareutils.mtk.so");
+    sAPUWareUtilsLibHandle =
+        loadApuWareUtilsLibrary("libapuwareutils_v2.mtk.so");
+    if (sAPUWareUtilsLibHandle == nullptr) {
+      sAPUWareUtilsLibHandle =
+          loadApuWareUtilsLibrary("libapuwareutils.mtk.so");
+    }
   }
   return sAPUWareUtilsLibHandle;
 }

diff --git a/neuron/neuron_delegate.cc b/neuron/neuron_delegate.cc
@@ -27,9 +27,11 @@
 #include "neuron/neuron_delegate_kernel.h"
 #include "neuron/neuron_delegate_validation.h"
 #include "neuron/neuron_implementation.h"
+#include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/context_util.h"
 #include "tensorflow/lite/delegates/utils/simple_delegate.h"
 #include "tensorflow/lite/minimal_logging.h"
+#include "tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
 namespace neuron {
@@ -50,8 +52,11 @@ class NeuronDelegate : public SimpleDelegateInterface {
     std::vector<NeuronValidationFailure> failure;
     bool supported = Validate(registration, node, context, &failure);
     if (!supported) {
-      TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, "OP %d is not supported(%s)",
-                      registration->builtin_code, failure[0].message.c_str());
+      TFLITE_LOG_PROD(
+          tflite::TFLITE_LOG_ERROR, "OP %s (v%d) is not supported (%s)",
+          tflite::EnumNameBuiltinOperator(
+              static_cast<BuiltinOperator>(registration->builtin_code)),
+          registration->version, failure[0].message.c_str());
     }
     return supported;
   }

diff --git a/neuron/neuron_delegate_builder.h b/neuron/neuron_delegate_builder.h
@@ -43,6 +43,7 @@ enum {
   NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
   NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
   NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
+  NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
 };
 
 class DequantizeMapping {
@@ -406,6 +407,65 @@ class NeuronOpBuilder {
     return kTfLiteOk;
   }
 
+  // Add a RESHAPE op which reshapes an NNAPI intermediate output to the
+  // dimensions of the TFLite output tensor.
+  TfLiteStatus AppendReshape(int nn_input_index, int lite_out_tensor_index) {
+    augmented_inputs_.push_back(nn_input_index);
+    auto& output_tensor = context_->tensors[lite_out_tensor_index];
+    TF_LITE_ENSURE_STATUS(
+        AddVectorInt32Operand(output_tensor.dims->data,
+                              static_cast<uint32_t>(output_tensor.dims->size)));
+    TF_LITE_ENSURE_OK(context_,
+                      AddTensorOutput(lite_out_tensor_index,
+                                      NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
+    TF_LITE_ENSURE_STATUS(
+        FinalizeAddOperation(NEURON_RESHAPE));
+    return kTfLiteOk;
+  }
+
+  // Lower PACK into CONCAT + RESHAPE when possible
+  TfLiteStatus TransformPackIntoSupportedOps(TfLiteNode* node,
+                                             TfLiteRegistration* reg) {
+    // Add input tensors for CONCAT, and calculate the dimensions for the
+    // output.
+    int concat_output_ann_index = -1;
+    TfLitePackParams* builtin =
+        reinterpret_cast<TfLitePackParams*>(node->builtin_data);
+    auto& input_tensor = context_->tensors[node->inputs->data[0]];
+    int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
+                                 : builtin->axis;
+    TF_LITE_ENSURE(context_, axis < input_tensor.dims->size);
+    uint32_t concat_dim_size = 0;
+    for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
+      const auto input_index = node->inputs->data[input_pos];
+      concat_dim_size +=
+          context_->tensors[node->inputs->data[input_pos]].dims->data[axis];
+      TF_LITE_ENSURE_STATUS(
+          AddTensorInput(input_index, /*hybrid_op=*/false,
+                         NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
+    }
+    TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(axis));
+    std::vector<uint32_t> concat_output_shape(input_tensor.dims->size, 0);
+    for (int i = 0; i < concat_output_shape.size(); i++) {
+      if (i == axis) {
+        concat_output_shape[i] = concat_dim_size;
+      } else {
+        concat_output_shape[i] = input_tensor.dims->data[i];
+      }
+    }
+    TF_LITE_ENSURE_STATUS(AddIntermediateOutputTensor(
+        input_tensor.type, concat_output_shape.size(),
+        concat_output_shape.data(), input_tensor.params.scale,
+        input_tensor.params.zero_point, &concat_output_ann_index));
+    TF_LITE_ENSURE_STATUS(
+        FinalizeAddOperation(NEURON_CONCATENATION));
+
+    // Reshape the output tensor
+    TF_LITE_ENSURE_STATUS(AppendReshape(
+        concat_output_ann_index, node->outputs->data[0]));
+    return kTfLiteOk;
+  }
+
   // Finish emitting the op (of type `type`) into the Neuron.
   TfLiteStatus FinalizeAddOperation(NeuronOperationType type) {
     // Actually add a Neuron operation
@@ -517,6 +577,31 @@ class NeuronOpBuilder {
     return result;
   }
 
+  TfLiteStatus AddIntermediateOutputTensor(TfLiteType tfl_type,
+                                           uint32_t dimension_count,
+                                           const uint32_t* dimension_data,
+                                           float scale, int32_t zero_point,
+                                           int* ann_index_out) {
+    int32_t nn_type;
+    switch (tfl_type) {
+      case kTfLiteFloat32:
+        nn_type = NEURON_TENSOR_FLOAT32;
+        break;
+      case kTfLiteInt8:
+        nn_type = NEURON_TENSOR_QUANT8_ASYMM_SIGNED;
+        break;
+      case kTfLiteUInt8:
+        nn_type = NEURON_TENSOR_QUANT8_ASYMM;
+        break;
+      default:
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(
+        AddAdditionalOutputTensor(dimension_count, dimension_data, nn_type,
+                                  scale, zero_point, ann_index_out));
+    return kTfLiteOk;
+  }
+
  private:
   // Returns a TF Lite type which has the same memory representation as a
   // provided Neuron type.
@@ -626,6 +711,8 @@ class NeuronOpBuilder {
         tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
     const bool use_int8_asymm_signed =
         tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
+    const bool force_per_channel =
+        tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
     int neuron_tensor_index =
         operand_mapping_->lite_index_to_neuron(tensor_index);
     if (neuron_tensor_index != -1) {
@@ -684,7 +771,7 @@ class NeuronOpBuilder {
           TfLiteAffineQuantization* quantization_params =
               static_cast<TfLiteAffineQuantization*>(
                   tensor->quantization.params);
-          if (quantization_params->scale->size > 1) {
+          if (quantization_params->scale->size > 1 || force_per_channel) {
             // Set up per-channel quantization.
             ann_perchannel_params = {
                 .channelDim = static_cast<uint32_t>(