-
Notifications
You must be signed in to change notification settings - Fork 158
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[onert-micro] Replace common kernels impl to pal/common
This pr replaces common kernels impl to pal/common dir. ONE-DCO-1.0-Signed-off-by: Artem Balyshev <[email protected]>
- Loading branch information
Artem Balyshev
committed
Aug 4, 2023
1 parent
2e8ff58
commit bfdd5fc
Showing
18 changed files
with
1,391 additions
and
1,084 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* | ||
* Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* Copyright 2017 The TensorFlow Authors. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef LUCI_INTERPRETER_PAL_ADD_COMMON_H | ||
#define LUCI_INTERPRETER_PAL_ADD_COMMON_H | ||
|
||
#include "Params.h" | ||
#include "PALUtils.h" | ||
#include "ProcessBroadcastShapes.h" | ||
|
||
namespace luci_interpreter_pal | ||
{ | ||
|
||
// TODO: check if there real activation value | ||
template <typename T> | ||
inline void Add(const ArithmeticParams ¶ms, const int flat_size, const T *input1_data, | ||
const T *input2_data, T *output_data) | ||
{ | ||
T activation_min, activation_max; | ||
getActivationParams(params, &activation_min, &activation_max); | ||
|
||
for (int i = 0; i < flat_size; ++i) | ||
output_data[i] = | ||
std::min(std::max(input1_data[i] + input2_data[i], activation_min), activation_max); | ||
} | ||
|
||
template <typename T> | ||
inline void | ||
BroadcastAdd4DSlow(const ArithmeticParams ¶ms, | ||
const luci_interpreter::RuntimeShape &input1_shape, const T *input1_data, | ||
const luci_interpreter::RuntimeShape &input2_shape, const T *input2_data, | ||
const luci_interpreter::RuntimeShape &output_shape, T *output_data) | ||
{ | ||
NdArrayDesc<4> desc1; | ||
NdArrayDesc<4> desc2; | ||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); | ||
const luci_interpreter::RuntimeShape extended_output_shape = | ||
luci_interpreter::RuntimeShape::extendedShape(4, output_shape); | ||
|
||
T activation_min, activation_max; | ||
getActivationParams(params, &activation_min, &activation_max); | ||
|
||
// In Tensorflow, the dimensions are canonically named (batch_number, row, | ||
// col, channel), with extents (batches, height, width, depth), with the | ||
// trailing dimension changing most rapidly (channels has the smallest stride, | ||
// typically 1 element). | ||
// | ||
// In generated C code, we store arrays with the dimensions reversed. The | ||
// first dimension has smallest stride. | ||
// | ||
// We name our variables by their Tensorflow convention, but generate C code | ||
// nesting loops such that the innermost loop has the smallest stride for the | ||
// best cache behavior. | ||
for (int b = 0; b < extended_output_shape.dims(0); ++b) | ||
{ | ||
for (int y = 0; y < extended_output_shape.dims(1); ++y) | ||
{ | ||
for (int x = 0; x < extended_output_shape.dims(2); ++x) | ||
{ | ||
for (int c = 0; c < extended_output_shape.dims(3); ++c) | ||
{ | ||
const int output_data_offset = | ||
((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) * | ||
extended_output_shape.dims(3) + | ||
c; | ||
|
||
output_data[output_data_offset] = | ||
std::min(std::max(input1_data[subscriptToIndex(desc1, b, y, x, c)] + | ||
input2_data[subscriptToIndex(desc2, b, y, x, c)], | ||
activation_min), | ||
activation_max); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
} // namespace luci_interpreter_pal | ||
|
||
#endif // LUCI_INTERPRETER_PAL_ADD_COMMON_H |
93 changes: 93 additions & 0 deletions
93
onert-micro/luci-interpreter/pal/common/PALAveragePool2DCommon.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* Copyright 2020 The TensorFlow Authors. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_COMMON_H | ||
#define LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_COMMON_H | ||
|
||
#include "Params.h" | ||
#include "PALUtils.h" | ||
|
||
namespace luci_interpreter_pal | ||
{ | ||
|
||
// TODO: reduce code duplication with MaxPool | ||
inline void AveragePool(const PoolParams ¶ms, const luci_interpreter::RuntimeShape &input_shape, | ||
const float *input_data, const luci_interpreter::RuntimeShape &output_shape, | ||
float *output_data) | ||
{ | ||
const int batches = input_shape.dims(0); | ||
const int depth = output_shape.dims(3); | ||
const int input_height = input_shape.dims(1); | ||
const int input_width = input_shape.dims(2); | ||
const int output_height = output_shape.dims(1); | ||
const int output_width = output_shape.dims(2); | ||
const int stride_height = params.stride_height; | ||
const int stride_width = params.stride_width; | ||
for (int batch = 0; batch < batches; ++batch) | ||
{ | ||
for (int out_y = 0; out_y < output_height; ++out_y) | ||
{ | ||
for (int out_x = 0; out_x < output_width; ++out_x) | ||
{ | ||
for (int channel = 0; channel < depth; ++channel) | ||
{ | ||
const int in_x_origin = (out_x * stride_width) - params.padding_values.width; | ||
const int in_y_origin = (out_y * stride_height) - params.padding_values.height; | ||
// Compute the boundaries of the filter region clamped so as to | ||
// ensure that the filter window fits in the input array. | ||
const int filter_x_start = std::max(0, -in_x_origin); | ||
const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin); | ||
const int filter_y_start = std::max(0, -in_y_origin); | ||
const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); | ||
|
||
float total = 0.f; | ||
float filter_count = 0; | ||
|
||
for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) | ||
{ | ||
for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) | ||
{ | ||
const int in_x = in_x_origin + filter_x; | ||
const int in_y = in_y_origin + filter_y; | ||
|
||
const int input_data_offset = | ||
((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) * | ||
input_shape.dims(3) + | ||
channel; | ||
|
||
total += input_data[input_data_offset]; | ||
filter_count++; | ||
} | ||
} | ||
const int output_data_offset = | ||
((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) * | ||
output_shape.dims(3) + | ||
channel; | ||
|
||
assert(filter_count != 0); | ||
const float average = total / filter_count; | ||
|
||
output_data[output_data_offset] = | ||
std::min(std::max(average, params.float_activation_min), params.float_activation_max); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} // namespace luci_interpreter_pal | ||
|
||
#endif // LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_COMMON_H |
201 changes: 201 additions & 0 deletions
201
onert-micro/luci-interpreter/pal/common/PALConv2DCommon.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
/* | ||
* Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* Copyright 2017 The TensorFlow Authors. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef LUCI_INTERPRETER_PAL_CONV2D_COMMON_H | ||
#define LUCI_INTERPRETER_PAL_CONV2D_COMMON_H | ||
#include "Params.h" | ||
#include "PALUtils.h" | ||
|
||
namespace luci_interpreter_pal | ||
{ | ||
static inline void Conv(const ConvParams ¶ms, const int32_t *input_shape, | ||
const float *input_data, const int32_t *filter_shape, | ||
const float *filter_data, const float *bias_data, | ||
const int32_t *output_shape, float *output_data) | ||
{ | ||
const int stride_width = params.stride_width; | ||
const int stride_height = params.stride_height; | ||
const int dilation_width_factor = params.dilation_width_factor; | ||
const int dilation_height_factor = params.dilation_height_factor; | ||
const int pad_width = params.padding_values.width; | ||
const int pad_height = params.padding_values.height; | ||
const float output_activation_min = params.float_activation_min; | ||
const float output_activation_max = params.float_activation_max; | ||
|
||
const auto batches = input_shape[0]; | ||
const int input_height = input_shape[1]; | ||
const int input_width = input_shape[2]; | ||
const int input_depth = input_shape[3]; | ||
const int output_depth = filter_shape[0]; | ||
const int filter_height = filter_shape[1]; | ||
const int filter_width = filter_shape[2]; | ||
const int output_height = output_shape[1]; | ||
const int output_width = output_shape[2]; | ||
for (int batch = 0; batch < batches; ++batch) | ||
{ | ||
for (int out_y = 0; out_y < output_height; ++out_y) | ||
{ | ||
const int in_y_origin = (out_y * stride_height) - pad_height; | ||
for (int out_x = 0; out_x < output_width; ++out_x) | ||
{ | ||
const int in_x_origin = (out_x * stride_width) - pad_width; | ||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) | ||
{ | ||
float total = 0.f; | ||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) | ||
{ | ||
const int in_y = in_y_origin + dilation_height_factor * filter_y; | ||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) | ||
{ | ||
const int in_x = in_x_origin + dilation_width_factor * filter_x; | ||
|
||
// Zero padding by omitting the areas outside the image. | ||
const bool is_point_inside_image = | ||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); | ||
|
||
if (!is_point_inside_image) | ||
{ | ||
continue; | ||
} | ||
|
||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) | ||
{ | ||
const int input_data_offset = | ||
((batch * input_height + in_y) * input_width + in_x) * input_depth + in_channel; | ||
|
||
const int filter_data_offset = | ||
((out_channel * filter_height + filter_y) * filter_width + filter_x) * | ||
input_depth + | ||
in_channel; | ||
|
||
const float input_value = input_data[input_data_offset]; | ||
const float filter_value = filter_data[filter_data_offset]; | ||
total += (input_value * filter_value); | ||
} | ||
} | ||
} | ||
// float bias_value = 0.0f; | ||
if (bias_data) | ||
{ | ||
total += bias_data[out_channel]; | ||
} | ||
|
||
const int output_data_offset = | ||
((batch * output_height + out_y) * output_width + out_x) * output_depth + out_channel; | ||
|
||
output_data[output_data_offset] = | ||
std::min(std::max(total, output_activation_min), output_activation_max); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
static inline void Conv(const ConvParams ¶ms, const int32_t *input_shape, | ||
const uint8_t *input_data, const int32_t *filter_shape, | ||
const uint8_t *filter_data, const int32_t *bias_data, | ||
const int32_t *output_shape, uint8_t *output_data) | ||
{ | ||
const int stride_width = params.stride_width; | ||
const int stride_height = params.stride_height; | ||
const int dilation_width_factor = params.dilation_width_factor; | ||
const int dilation_height_factor = params.dilation_height_factor; | ||
const int pad_width = params.padding_values.width; | ||
const int pad_height = params.padding_values.height; | ||
const int32_t input_offset = params.input_offset; | ||
const int32_t filter_offset = params.weights_offset; | ||
const int32_t output_offset = params.output_offset; | ||
const int32_t output_multiplier = params.output_multiplier; | ||
const int output_shift = params.output_shift; | ||
const int32_t output_activation_min = params.quantized_activation_min; | ||
const int32_t output_activation_max = params.quantized_activation_max; | ||
|
||
const auto batches = input_shape[0]; | ||
const int input_height = input_shape[1]; | ||
const int input_width = input_shape[2]; | ||
const int input_depth = input_shape[3]; | ||
const int output_depth = filter_shape[0]; | ||
const int filter_height = filter_shape[1]; | ||
const int filter_width = filter_shape[2]; | ||
const int output_height = output_shape[1]; | ||
const int output_width = output_shape[2]; | ||
|
||
for (int batch = 0; batch < batches; ++batch) | ||
{ | ||
for (int out_y = 0; out_y < output_height; ++out_y) | ||
{ | ||
const int in_y_origin = (out_y * stride_height) - pad_height; | ||
for (int out_x = 0; out_x < output_width; ++out_x) | ||
{ | ||
const int in_x_origin = (out_x * stride_width) - pad_width; | ||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) | ||
{ | ||
int32_t acc = 0; | ||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) | ||
{ | ||
const int in_y = in_y_origin + dilation_height_factor * filter_y; | ||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) | ||
{ | ||
const int in_x = in_x_origin + dilation_width_factor * filter_x; | ||
|
||
// Zero padding by omitting the areas outside the image. | ||
const bool is_point_inside_image = | ||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); | ||
|
||
if (!is_point_inside_image) | ||
{ | ||
continue; | ||
} | ||
|
||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) | ||
{ | ||
const int input_data_offset = | ||
((batch * input_height + in_y) * input_width + in_x) * input_depth + in_channel; | ||
|
||
const int filter_data_offset = | ||
((out_channel * filter_height + filter_y) * filter_width + filter_x) * | ||
input_depth + | ||
in_channel; | ||
|
||
const int32_t input_val = input_data[input_data_offset]; | ||
const int32_t filter_val = filter_data[filter_data_offset]; | ||
acc += (filter_val + filter_offset) * (input_val + input_offset); | ||
} | ||
} | ||
} | ||
if (bias_data) | ||
{ | ||
acc += bias_data[out_channel]; | ||
} | ||
acc = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); | ||
acc += output_offset; | ||
acc = std::max(acc, output_activation_min); | ||
acc = std::min(acc, output_activation_max); | ||
|
||
const int output_data_offset = | ||
((batch * output_height + out_y) * output_width + out_x) * output_depth + out_channel; | ||
|
||
output_data[output_data_offset] = static_cast<uint8_t>(acc); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
} // namespace luci_interpreter_pal | ||
|
||
#endif // LUCI_INTERPRETER_PAL_CONV2D_COMMON_H |
Oops, something went wrong.