diff --git a/compiler/fme-apply/driver/Driver.cpp b/compiler/fme-apply/driver/Driver.cpp index 76aceff44f9..234fc51eda6 100644 --- a/compiler/fme-apply/driver/Driver.cpp +++ b/compiler/fme-apply/driver/Driver.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/compiler/fme-detect/driver/Driver.cpp b/compiler/fme-detect/driver/Driver.cpp index 63ef0203885..f8e8ab968f5 100644 --- a/compiler/fme-detect/driver/Driver.cpp +++ b/compiler/fme-detect/driver/Driver.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp index de26b379cce..b6123796b9c 100644 --- a/compiler/luci/export/src/CircleExporterImpl.cpp +++ b/compiler/luci/export/src/CircleExporterImpl.cpp @@ -159,10 +159,31 @@ void CircleExporterImpl::exportModule(Module *module) // prepare model data prepareModelData(_builder, md); - exportModuleData(module, md); + // if source is extended buffer mode, force export to use extended buffer + md._ext_buffer = module->ext_buffer(); + + if (!exportModuleData(module, md) && md._require_ext_buffer) + { + assert(md._ext_buffer == false); + + // do some cleanups for re-run + _builder.Clear(); + for (size_t g = 0; g < module->size(); ++g) + { + auto graph = module->graph(g); + clearExportInfo(graph); + } + prepareModelData(_builder, md); + + // run again with ext_buffer mode + md._ext_buffer = true; + exportModuleData(module, md); + } + + finalizeWithExtendedBuffer(md); } -void CircleExporterImpl::exportModuleData(Module *module, SerializedModelData &md) +bool CircleExporterImpl::exportModuleData(Module *module, SerializedModelData &md) { std::vector> subgraph_vec; @@ -208,6 +229,13 @@ void CircleExporterImpl::exportModuleData(Module *module, SerializedModelData &m // create array of buffers auto buffers = _builder.CreateVector(md._buffers); + // check current total size exceeds limit + if (check_size_limit(_builder, 0)) + { + md._require_ext_buffer = true; + return false; + } + // This version is taken from comment in fbs constexpr uint32_t version = 0; @@ -215,13 +243,83 @@ void CircleExporterImpl::exportModuleData(Module *module, SerializedModelData &m auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description, buffers, 0 /* metadata_buffer */, metadata); FinishModelBuffer(_builder, model_offset); + + return true; +} + +void CircleExporterImpl::finalizeWithExtendedBuffer(SerializedModelData &md) +{ + _ext_buffer = md._ext_buffer; + if (!_ext_buffer) + return; + + _fb_data_with_ext.clear(); + + auto align16 = [](size_t &v) { + while (v % 16 != 0) + v++; + }; + + // get total memory for flatbuffer + all buffer_data + size_t result_size = _builder.GetSize(); + align16(result_size); + for (auto &it : md._buffer_data_map) + { + SerializedModelData::BufferData &buffer_data = it.second; + result_size += buffer_data.size(); + align16(result_size); + } + align16(result_size); + result_size += 16; // for safety + + std::string result; + const char *buff_ptr = reinterpret_cast(_builder.GetBufferPointer()); + + auto padalign16 = [](std::string &str) { + while (str.size() % 16 != 0) + str += '\0'; + }; + + result.reserve(result_size); + result.append(buff_ptr, _builder.GetSize()); + + auto mutable_model = circle::GetMutableModel(result.data()); + auto mutable_buffers = mutable_model->mutable_buffers(); + + // pad to be 16 bytes aligned + padalign16(result); + for (auto &it : md._buffer_data_map) + { + int32_t buffer_index = it.first; + SerializedModelData::BufferData &buffer_data = it.second; + uint64_t offset = result.size(); + uint64_t size = buffer_data.size(); + + circle::Buffer *mutable_buffer = mutable_buffers->GetMutableObject(buffer_index); + mutable_buffer->mutate_offset(offset); + mutable_buffer->mutate_size(size); + + result.append(buffer_data.begin(), buffer_data.end()); + padalign16(result); + } + padalign16(result); + + // use final result + _fb_data_with_ext = result; } const char *CircleExporterImpl::getBufferPointer() const { + if (_ext_buffer) + return reinterpret_cast(_fb_data_with_ext.data()); return reinterpret_cast(_builder.GetBufferPointer()); } -size_t CircleExporterImpl::getBufferSize() const { return _builder.GetSize(); } +size_t CircleExporterImpl::getBufferSize() const +{ + if (_ext_buffer) + return _fb_data_with_ext.size(); + return _builder.GetSize(); +} } // namespace luci diff --git a/compiler/luci/export/src/CircleExporterImpl.h b/compiler/luci/export/src/CircleExporterImpl.h index 0bcd44885bd..c6a2c199e7e 100644 --- a/compiler/luci/export/src/CircleExporterImpl.h +++ b/compiler/luci/export/src/CircleExporterImpl.h @@ -67,10 +67,17 @@ class CircleExporterImpl /** * @brief implementation that writes Module into internal buffer */ - void exportModuleData(Module *module, SerializedModelData &md); + bool exportModuleData(Module *module, SerializedModelData &md); + + /** + * @brief finalizes file stream with extended buffer from internal buffer + */ + void finalizeWithExtendedBuffer(SerializedModelData &md); private: flatbuffers::FlatBufferBuilder _builder; + bool _ext_buffer = false; + std::string _fb_data_with_ext; }; } // namespace luci diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h index 83b040753dc..5797c364a7a 100644 --- a/compiler/luci/export/src/CircleExporterUtils.h +++ b/compiler/luci/export/src/CircleExporterUtils.h @@ -26,6 +26,9 @@ #include +// limitation of current flatbuffers file size +inline constexpr unsigned int FLATBUFFERS_SIZE_MAX = 2147483648; + namespace luci { @@ -60,6 +63,12 @@ void set_tensor_index(loco::Node *node, const CircleTensorIndex &tensor_id); void clear_tensor_index(loco::Node *node); CircleTensorIndex get_tensor_index(loco::Node *node); +// check if Flatbuffer builder can no longer hold the given amount of the data +inline bool check_size_limit(const flatbuffers::FlatBufferBuilder &fb, const uint64_t data_size) +{ + return data_size > FLATBUFFERS_SIZE_MAX - fb.GetSize(); +} + } // namespace luci #endif // __CIRCLE_EXPORTER_UTILS_H__ diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp index 57ae160bd54..87553b3f5ad 100644 --- a/compiler/luci/export/src/CircleTensorExporter.cpp +++ b/compiler/luci/export/src/CircleTensorExporter.cpp @@ -15,6 +15,7 @@ */ #include "CircleTensorExporter.h" +#include "CircleExporterUtils.h" #include #include @@ -346,7 +347,7 @@ flatbuffers::Offset encodeOpBuffer(FlatBufferBuilder &builder, template flatbuffers::Offset -encodeOpBufferByDType(FlatBufferBuilder &builder, SerializedModelData &, luci::CircleConst *c) +encodeOpBufferByDType(FlatBufferBuilder &builder, SerializedModelData &md, luci::CircleConst *c) { using NativeType = typename loco::DataTypeImpl
::Type; @@ -358,6 +359,26 @@ encodeOpBufferByDType(FlatBufferBuilder &builder, SerializedModelData &, luci::C raw_data.push_back(c->at
(i)); } const size_t raw_size = size * sizeof(NativeType); + + if (md._ext_buffer) + { + // TODO optimize this if this operation takes long or much memory + SerializedModelData::BufferData buffer_data; + buffer_data.resize(raw_size); + std::memcpy(buffer_data.data(), raw_data.data(), raw_size); + + int32_t buffer_index = md._buffers.size(); + md._buffer_data_map.emplace(buffer_index, buffer_data); + + // create fake indicator buffer + return circle::CreateBuffer(builder, 0 /* data */, 1 /* offset */, 1 /* size */); + } + if (check_size_limit(builder, raw_size)) + { + md._require_ext_buffer = true; + return md._empty_buffer; + } + auto array_offset = builder.CreateVector(reinterpret_cast(raw_data.data()), raw_size); return CreateBuffer(builder, array_offset); } @@ -658,14 +679,16 @@ namespace luci void prepareModelData(FlatBufferBuilder &builder, SerializedModelData &md) { + md.clear(); + // add one empty buffer // note: this follows TFLite // note: there's a comment in tflite fbs file // - Note the 0th entry of this array must be an empty buffer (sentinel). // - This is a convention so that tensors without a buffer can provide 0 as // - their buffer. - auto buffer = encodeOpBuffer(builder); - md._buffers.push_back(buffer); + md._empty_buffer = encodeOpBuffer(builder); + md._buffers.push_back(md._empty_buffer); } void exportOpDefinedTensors(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md, diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h index 32fa481ab9d..4339e404951 100644 --- a/compiler/luci/export/src/SerializedData.h +++ b/compiler/luci/export/src/SerializedData.h @@ -123,6 +123,7 @@ struct SerializedModelData final std::unordered_map _operator_codes; std::vector> _buffers; + flatbuffers::Offset _empty_buffer; CircleExportMetadata _metadata; // This is used for removing buffers with same values diff --git a/compiler/luci/import/include/luci/Importer.h b/compiler/luci/import/include/luci/Importer.h index 3111008de51..6efe70165b2 100644 --- a/compiler/luci/import/include/luci/Importer.h +++ b/compiler/luci/import/include/luci/Importer.h @@ -41,9 +41,10 @@ class Importer final // DO NOTHING } -public: - // TODO move to private +private: std::unique_ptr importModule(const circle::Model *model) const; + +public: std::unique_ptr importModule(const uint8_t *data, size_t size); private: diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp index eef4a739013..9d3d0ac4b0d 100644 --- a/compiler/luci/import/src/Importer.cpp +++ b/compiler/luci/import/src/Importer.cpp @@ -42,7 +42,7 @@ namespace { void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &reader, - loco::Graph *graph) + loco::Graph *graph, bool &ext_buffer) { LOGGER(l); @@ -242,6 +242,8 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r auto dtype = luci::luci_datatype(tensor->type()); graph_output->dtype(dtype); } + + ext_buffer = gb_context.ext_buffer(); } class ValidateCollector final : public loco::ErrorListener @@ -277,17 +279,8 @@ std::unique_ptr Importer::importModule(const circle::Model *model) const } CircleReader reader; - if (_file_data && _file_size) - { - if (!reader.parse(model, _file_data, _file_size)) - return nullptr; - } - else - { - // TODO remove this - if (!reader.parse(model)) - return nullptr; - } + if (!reader.parse(model, _file_data, _file_size)) + return nullptr; for (uint32_t g = 0; g < reader.num_subgraph(); ++g) { @@ -299,7 +292,8 @@ std::unique_ptr Importer::importModule(const circle::Model *model) const graph->name(reader.name()); // Convert circle::Model to loco::Graph - convert_graph(*source_ptr, reader, graph.get()); + bool graph_ext_buffer = false; + convert_graph(*source_ptr, reader, graph.get(), graph_ext_buffer); LOGGER(l); VERBOSE(l, 3) << "--- graph dump begin -------------------------------------------"; @@ -310,6 +304,9 @@ std::unique_ptr Importer::importModule(const circle::Model *model) const assert(loco::valid(graph.get(), std::make_unique())); module->add(std::move(graph)); + + if (graph_ext_buffer) + module->ext_buffer(true); } post_import_graph(module.get(), reader); diff --git a/compiler/luci/import/src/ImporterEx.cpp b/compiler/luci/import/src/ImporterEx.cpp index a3cb601d764..9db0465fbc4 100644 --- a/compiler/luci/import/src/ImporterEx.cpp +++ b/compiler/luci/import/src/ImporterEx.cpp @@ -40,22 +40,27 @@ std::unique_ptr ImporterEx::importVerifyModule(const std::string &input_ return nullptr; } - flatbuffers::Verifier verifier{reinterpret_cast(model_data.data()), model_data.size()}; + auto data_data = reinterpret_cast(model_data.data()); + auto data_size = model_data.size(); + + flatbuffers::Verifier verifier{data_data, data_size}; if (!circle::VerifyModelBuffer(verifier)) { std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; return nullptr; } - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; - return nullptr; - } + Importer importer(_source); + return importer.importModule(data_data, data_size); +} + +std::unique_ptr ImporterEx::importModule(std::vector &model_data) const +{ + auto data_data = reinterpret_cast(model_data.data()); + auto data_size = model_data.size(); - Importer importer; - return importer.importModule(circle_model); + Importer importer(_source); + return importer.importModule(data_data, data_size); } std::unique_ptr ImporterEx::importModule(std::vector &model_data) const diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp index 189f4d897f4..fb20fb70666 100644 --- a/compiler/luci/import/src/Nodes/CircleConst.cpp +++ b/compiler/luci/import/src/Nodes/CircleConst.cpp @@ -16,6 +16,8 @@ #include "luci/Import/Nodes/CircleConst.h" +#include "luci/Import/CircleReader.h" + #include #include @@ -27,6 +29,8 @@ #include #include +#include + namespace { @@ -156,8 +160,49 @@ CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index, return nullptr; } - assert(reader->buffers()[const_tensor->buffer()] != nullptr); - const auto buffer = wrap(reader->buffers()[const_tensor->buffer()]->data()); + const auto r_buffers = reader->buffers(); + const auto c_buffer = const_tensor->buffer(); + assert(r_buffers[c_buffer] != nullptr); + const auto r_buffer = r_buffers[c_buffer]; + // temporary buffer to provide raw data from file + // must have life time same or longer than 'buffer' variable + std::vector temp_buffer; + // const auto buffer = wrap(r_buffer->data()); + luci::VectorWrapper buffer(nullptr); + if (r_buffer->offset() > 1) + { + uint32_t r_size = static_cast(r_buffer->size()); + // match binary level to flatbuffers::Vector + temp_buffer.resize(r_size + sizeof(uint32_t)); + + uint8_t *t_data = temp_buffer.data(); + const uint8_t *f_data = reader->file_data(r_buffer->offset()); + if (f_data == nullptr) + { + // NOTE this shouldn't happen + assert(false); + return nullptr; + } + memcpy(t_data, &r_size, sizeof(r_size)); + t_data = t_data + sizeof(r_size); + if (r_buffer->offset() + r_buffer->size() > reader->file_size()) + { + // NOTE this shouldn't happen + assert(false); + return nullptr; + } + memcpy(t_data, f_data, r_buffer->size()); + + using fbv_t = flatbuffers::Vector; + const fbv_t *v_data = reinterpret_cast(temp_buffer.data()); + buffer = wrap(v_data); + + context->ext_buffer(true); + } + else + { + buffer = wrap(r_buffer->data()); + } const auto const_dims = wrap(const_tensor->shape()); // in NHWC if (const_dims.size() == 0 && buffer.empty()) { diff --git a/compiler/luci/tester/src/ReadModule.cpp b/compiler/luci/tester/src/ReadModule.cpp index 87c1233f07f..2c0d5fe8fd7 100644 --- a/compiler/luci/tester/src/ReadModule.cpp +++ b/compiler/luci/tester/src/ReadModule.cpp @@ -31,15 +31,15 @@ std::unique_ptr ReadModule(std::string &input_path) // Load model from the file foder::FileLoader file_loader{input_path}; std::vector model_data = file_loader.load(); - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) + + auto *data_data = reinterpret_cast(model_data.data()); + luci::Importer importer; + auto module = importer.importModule(data_data, model_data.size()); + if (module == nullptr) { std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; return nullptr; } - - luci::Importer importer; - auto module = importer.importModule(circle_model); assert(module->size() > 0); for (size_t g = 0; g < module->size(); ++g) diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst index d5debbdb01b..7359deabd72 100644 --- a/compiler/luci/tests/test.lst +++ b/compiler/luci/tests/test.lst @@ -35,6 +35,7 @@ addread(Conv2D_000) addread(Conv2D_001) addread(Conv2D_002) addread(Conv2D_003) +addread(Conv2D_006) addread(Conv2D_U8_000) addread(Conv2D_U8_001) addread(Cos_000) @@ -266,6 +267,7 @@ addwrite(Conv2D_000) addwrite(Conv2D_001) addwrite(Conv2D_002) addwrite(Conv2D_003) +addwrite(Conv2D_006) addwrite(Conv2D_U8_000) addwrite(Conv2D_U8_001) addwrite(Cos_000) diff --git a/res/TensorFlowLiteRecipes/Conv2D_006/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_006/test.recipe new file mode 100644 index 00000000000..d4da00b9026 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Conv2D_006/test.recipe @@ -0,0 +1,52 @@ +# test to store as buffer data to outside of flatbuffer + +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 4 dim: 3 dim: 2 } +} +operand { + name: "ker" + type: FLOAT32 + shape { dim: 2 dim: 2 dim: 2 dim: 2 } + filler { + tag: "explicit" + arg: "1" arg: "2" arg: "-3" arg: "-4" + arg: "-5" arg: "6" arg: "-7" arg: "8" + arg: "4" arg: "-2" arg: "3" arg: "-1" + arg: "-8" arg: "-6" arg: "7" arg: "5" + } +} +operand { + name: "bias" + type: FLOAT32 + shape { dim: 2 } + filler { + tag: "explicit" + arg: "1" + arg: "2" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 2 dim: 2 dim: 2 } +} +operation { + type: "Conv2D" + conv2d_options { + padding: VALID + stride_w: 1 + stride_h: 2 + dilation_w_factor: 1 + dilation_h_factor: 1 + activation: RELU + } + input: "ifm" + input: "ker" + input: "bias" + output: "ofm" +} +input: "ifm" +output: "ofm" +ext_offset: true