diff --git a/.gitignore b/.gitignore index 1b7780b..6588c27 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ *tmp/ *.pyc -program/mobilenets-armcl-opencl/batches* -program/mobilenets-armcl-opencl/images*.txt +program/mobilenets-armcl-opencl/preprocessed + diff --git a/program/mobilenets-armcl-opencl/.cm/meta.json b/program/mobilenets-armcl-opencl/.cm/meta.json index d2582c0..9adde14 100644 --- a/program/mobilenets-armcl-opencl/.cm/meta.json +++ b/program/mobilenets-armcl-opencl/.cm/meta.json @@ -42,14 +42,16 @@ "compiler_env": "CK_CXX", "compiler_flags_as_env": "$<>$ -Wno-ignored-attributes", "data_name": "mobilenets-armcl-opencl", - "linker_add_lib_as_env": [ - "CK_CXX_EXTRA", - "CK_ENV_LIB_STDCPP_STATIC" - ], + "extra_ld_vars": "$<>$", "main_language": "cpp", "only_for_target_os_tags": [ + "android", "linux" ], + "print_files_after_run": [ + "tmp-stdout.tmp", + "tmp-stderr.tmp" + ], "process_in_tmp": "yes", "program": "yes", "run_cmds": { @@ -58,36 +60,41 @@ "run_time": { "output_invariant_of_repeat": "yes", "fine_grain_timer_file": "tmp-ck-timer.json", - "run_cmd_out1": "tmp-stdout.tmp", - "run_cmd_out2": "tmp-stderr.tmp", "need_compute_device": "opencl", - "pre_process_via_ck": { - "script_name": "preprocess" - }, "post_process_via_ck": "yes", "post_process_cmds": [ - "python $#src_path_local#$postprocess.py" + "python $#ck_take_from_{script:689867d1939a781d}#$postprocess.py" ], + "pre_process_via_ck": { + "module_uoa": "script", + "data_uoa": "689867d1939a781d", + "script_name": "preprocess" + }, "run_cmd_main": "$#BIN_FILE#$", + "run_cmd_out1": "tmp-stdout.tmp", + "run_cmd_out2": "tmp-stderr.tmp", "run_output_files": [ - "tmp-ck-timer.json" + "tmp-ck-timer.json", + "tmp-stdout.tmp", + "tmp-stderr.tmp" ] } } }, "run_vars": { - "CK_IMAGE_FILE": "", - "CK_IMAGE_LIST": "../images", - "CK_BATCH_LIST": "../batches", - "CK_BATCHES_DIR": "../batches", - "CK_RESULTS_DIR": "predictions", "CK_BATCH_COUNT": 1, "CK_BATCH_SIZE": 1, + "CK_CROP_PERCENT": 87.5, + "CK_IMAGE_FILE": "", + "CK_RECREATE_CACHE": "NO", "CK_SKIP_IMAGES": 0, - "CK_PREPARE_ALWAYS": "NO" + "CK_NORMALIZE_DATA": "YES", + "CK_SUBTRACT_MEAN": "YES", + "CK_TMP_IMAGE_SIZE": 0 }, "run_deps": { "weights": { + "force_target_as_host": "yes", "local": "yes", "name": "MobileNet weights (as NumPy arrays)", "sort": 10, @@ -95,12 +102,14 @@ "no_tags": "mobilenet-all" }, "imagenet-aux": { + "force_target_as_host": "yes", "local": "yes", "name": "ImageNet dataset (aux)", "sort": 20, "tags": "dataset,imagenet,aux" }, "imagenet-val": { + "force_target_as_host": "yes", "local": "yes", "name": "ImageNet dataset (val)", "sort": 30, @@ -110,7 +119,7 @@ "skip_bin_ext": "yes", "source_files": [ "benchmark.cpp", - "mobilenets.cpp", + "mobilenet.cpp", "$<>$/GraphUtils.cpp", "$<>$/Utils.cpp" ], diff --git a/program/mobilenets-armcl-opencl/README.md b/program/mobilenets-armcl-opencl/README.md index 21e666f..9d02415 100644 --- a/program/mobilenets-armcl-opencl/README.md +++ b/program/mobilenets-armcl-opencl/README.md @@ -1,14 +1,27 @@ -# mobilenet-armcl-opencl +# Classification program for ArmCL ImageNet classification and benchmarking using ArmCL and MobileNet. ## Requirements -ArmCL compiled with Graph API: +### ArmCL library +To build this program, you need ArmCL compiled with Graph API: ``` ck install package:lib-armcl-opencl-18.01 --env.USE_GRAPH=ON --env.USE_NEON=ON --extra_version=-graph ``` +To build this program for Android you need to embedd kernels and select target API as follows: +``` +$ ck install package:lib-armcl-opencl-18.05 --env.USE_GRAPH=ON --env.USE_NEON=ON --extra_version=-graph --env.USE_EMBEDDED_KERNELS=ON --env.DEBUG=ON --target_os=android23-arm64 +``` + +**NB:** Use `--target_os=android23-arm64` to build for Android API 23 (v6.0 "Marshmallow") or [similar](https://source.android.com/setup/start/build-numbers). + +We have to embed kernels when building for Android as OpenCL kernel files are not copied to a remote device. + +**TODO:** For some reason only debug version of the library can be used with this program on Android. When we use release version, the program gets stuck at graph preparation stage. + +### Weights package One of MobileNet weights packages: ``` ck install package:weights-mobilenet-v1-1.0-224-npy @@ -32,7 +45,7 @@ ck install package:weights-mobilenet-v1-0.50-128-npy ck install package:weights-mobilenet-v1-0.25-128-npy ``` -ImageNet dataset: +### ImageNet dataset ``` ck install package:imagenet-2012-val ck install package:imagenet-2012-aux @@ -40,10 +53,10 @@ ck install package:imagenet-2012-aux ## Build ``` -ck compile program:mobilenet-armcl-opencl +ck compile program:mobilenet-armcl-opencl [--target_os=android23-arm64] ``` ## Run ``` -ck run program:mobilenet-armcl-opencl +ck run program:mobilenet-armcl-opencl [--target_os=android23-arm64] ``` diff --git a/program/mobilenets-armcl-opencl/armcl_graph_common.h b/program/mobilenets-armcl-opencl/armcl_graph_common.h deleted file mode 100644 index 7bb06fd..0000000 --- a/program/mobilenets-armcl-opencl/armcl_graph_common.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 cTuning foundation. - * See CK COPYRIGHT.txt for copyright details. - * - * SPDX-License-Identifier: BSD-3-Clause. - * See CK LICENSE.txt for licensing details. - */ - -#pragma once - -#if defined(ARMCL_18_05_PLUS) -#include -#include -#include -#include -#else -#include -#include -#include -#endif - -#ifndef DATATYPE -#define DATATYPE DataType::F32 -#endif - -inline void printf_callback(const char *buffer, unsigned int len, size_t complete, void *user_data) { - printf("%.*s", len, buffer); -} - -inline void set_kernel_path() { - const char* kernel_path = getenv("CK_ENV_LIB_ARMCL_CL_KERNELS"); - if (kernel_path) { - printf("Kernel path: %s\n", kernel_path); - arm_compute::CLKernelLibrary::get().set_kernel_path(kernel_path); - } -} - -inline void init_armcl(arm_compute::ICLTuner *cl_tuner = nullptr) { - cl_context_properties properties[] = - { - CL_PRINTF_CALLBACK_ARM, reinterpret_cast(printf_callback), - CL_PRINTF_BUFFERSIZE_ARM, static_cast(0x100000), - CL_CONTEXT_PLATFORM, reinterpret_cast(cl::Platform::get()()), - 0 - }; - cl::Context::setDefault(cl::Context(CL_DEVICE_TYPE_DEFAULT, properties)); - arm_compute::CLScheduler::get().default_init(cl_tuner); - - // Should be called after initialization - set_kernel_path(); - -#if defined(ARMCL_18_05_PLUS) - arm_compute::graph::backends::BackendRegistry::get() - .add_backend( - arm_compute::graph::Target::CL); -#endif -} - -#if defined(ARMCL_18_05_PLUS) - -inline arm_compute::graph::ConvolutionMethod str_to_convolution_method(const char *method_name) { - if (!method_name || strlen(method_name) == 0) - return arm_compute::graph::ConvolutionMethod::DEFAULT; - - // Try to get convolution method by its name - if (strcmp(method_name, "DEFAULT") == 0) return arm_compute::graph::ConvolutionMethod::DEFAULT; - if (strcmp(method_name, "GEMM") == 0) return arm_compute::graph::ConvolutionMethod::GEMM; - if (strcmp(method_name, "DIRECT") == 0) return arm_compute::graph::ConvolutionMethod::DIRECT; - if (strcmp(method_name, "WINOGRAD") == 0) return arm_compute::graph::ConvolutionMethod::WINOGRAD; - - // Try to get convolution method as integer value. - switch (atoi(method_name)) { - case 0: return arm_compute::graph::ConvolutionMethod::GEMM; - case 1: return arm_compute::graph::ConvolutionMethod::DIRECT; - case 2: return arm_compute::graph::ConvolutionMethod::WINOGRAD; - } - - return arm_compute::graph::ConvolutionMethod::DEFAULT; -} - -inline arm_compute::graph::Target get_target_hint() { - return arm_compute::graph::Target::CL; -} - -#define GRAPH(graph_var, graph_name)\ - arm_compute::graph::frontend::Stream graph_var{ 0, graph_name }; - -#else // ArmCL < 18.05 - -inline arm_compute::graph::ConvolutionMethodHint str_to_convolution_method(const char *method_name) { - if (!method_name || strlen(method_name) == 0) - return arm_compute::graph::ConvolutionMethodHint::GEMM; - - // Try to get convolution method by its name - if (strcmp(method_name, "GEMM") == 0) return arm_compute::graph::ConvolutionMethodHint::GEMM; - if (strcmp(method_name, "DIRECT") == 0) return arm_compute::graph::ConvolutionMethodHint::DIRECT; - - // Try to get convolution method as integer value. - switch (atoi(method_name)) { - case 0: return arm_compute::graph::ConvolutionMethodHint::GEMM; - case 1: return arm_compute::graph::ConvolutionMethodHint::DIRECT; - } - - return arm_compute::graph::ConvolutionMethodHint::GEMM; -} - -inline arm_compute::graph::TargetHint get_target_hint() { - return arm_compute::graph::TargetHint::OPENCL; -} - -#define GRAPH(graph_var, graph_name) \ - arm_compute::graph::Graph graph_var; - -#endif // ArmCL < 18.05 - -inline auto get_convolution_method() -> decltype(str_to_convolution_method("")) { - auto method_name = getenv("CK_CONVOLUTION_METHOD"); - if (method_name) - return str_to_convolution_method(method_name); - - if (arm_compute::CLScheduler::get().target() == arm_compute::GPUTarget::BIFROST) - return decltype(str_to_convolution_method(""))::DIRECT; - - return decltype(str_to_convolution_method(""))::GEMM; -} diff --git a/program/mobilenets-armcl-opencl/benchmark.cpp b/program/mobilenets-armcl-opencl/benchmark.cpp index f9eb5f8..8ec84f4 100644 --- a/program/mobilenets-armcl-opencl/benchmark.cpp +++ b/program/mobilenets-armcl-opencl/benchmark.cpp @@ -6,79 +6,82 @@ * See CK LICENSE.txt for licensing details. */ -#include "benchmark.h" +// TODO: these headers should be moved to a common location (where?) +#include "../../../ck-tensorflow/program/image-classification-tflite/benchmark.h" +#include "../../../ck-math/program/armcl-classification-mobilenet/armcl_graph_common.h" -void run_mobilenet(); +using namespace std; +using namespace CK; + +void setup_mobilenet(GraphObject& graph, + unsigned int image_size, + float multiplier, + const std::string& weights_dir, + const float *input_data_buffer, + float *output_data_buffer); -void finish_test() { - int batch_count = session().batch_count(); - float total_load_images_time = session().total_load_images_time(); - float total_prediction_time = session().total_prediction_time(); - float avg_load_images_time = total_load_images_time / float(batch_count); - float avg_prediction_time = total_prediction_time / float(batch_count); - float setup_time = xopenme_get_timer(X_TIMER_SETUP); - float test_time = xopenme_get_timer(X_TIMER_TEST); - - cout << "-------------------------------\n"; - cout << "Graph loaded in " << setup_time << " s" << endl; - cout << "All batches loaded in " << total_load_images_time << " s" << endl; - cout << "All batches classified in " << total_prediction_time << " s" << endl; - cout << "Average classification time: " << avg_prediction_time << " s" << endl; - cout << "-------------------------------\n"; - - store_value_f(VAR_TIME_SETUP, "setup_time_s", setup_time); - store_value_f(VAR_TIME_TEST, "test_time_s ", test_time); - store_value_f(VAR_TIME_IMG_LOAD_TOTAL, "images_load_time_s", total_load_images_time); - store_value_f(VAR_TIME_IMG_LOAD_AVG, "images_load_time_avg_s", avg_load_images_time); - store_value_f(VAR_TIME_CLASSIFY_TOTAL, "prediction_time_total_s", total_prediction_time); - store_value_f(VAR_TIME_CLASSIFY_AVG, "prediction_time_avg_s", avg_prediction_time); - - xopenme_dump_state(); - xopenme_finish(); -} -int run_test() { - ofstream err_log("test_errors.log", ios::trunc); +int main(int argc, const char **argv) +{ try { - session().init(); + init_benchmark(); + init_armcl(); + + BenchmarkSettings settings; + if (settings.batch_size != 1) + throw runtime_error("Only single image batches are currently supported"); + + int resolution = getenv_i("RUN_OPT_RESOLUTION"); + float multiplier = getenv_f("RUN_OPT_MULTIPLIER"); + + vector input(resolution * resolution * 3); + vector probes(1001); - run_mobilenet(); + BenchmarkSession session(&settings); + Benchmark benchmark(&settings, input.data(), probes.data()); + benchmark.has_background_class = true; + + cout << "\nLoading graph..." << endl; + GRAPH(graph, "MobileNetV1"); + measure_setup([&] + { + setup_mobilenet(graph, resolution, multiplier, settings.graph_file, input.data(), probes.data()); + }); - return EXIT_SUCCESS; + cout << "\nProcessing batches..." << endl; + measure_prediction([&] + { + while (session.get_next_batch()) + { + session.measure_begin(); + benchmark.load_images(session.batch_files()); + session.measure_end_load_images(); + + session.measure_begin(); + graph.run(); + session.measure_end_prediction(); + + benchmark.save_results(session.batch_files()); + } + }); + + finish_benchmark(session); } catch (cl::Error &err) { - ostringstream msg; - msg << "\nERROR: " << err.what() << " (" << err.err() << ")"; - cerr << msg.str() << endl; - err_log << msg.str() << endl; - return EXIT_FAILURE; + cerr << "ERROR: " << err.what() << " (" << err.err() << ")" << endl; + return -1; } catch (std::runtime_error &err) { - ostringstream msg; - msg << "\nERROR: " << err.what() << " " << (errno ? strerror(errno) : ""); - cerr << msg.str() << endl; - err_log << msg.str() << endl; - return EXIT_FAILURE; + cerr << "ERROR: " << err.what() << " " << (errno ? strerror(errno) : "") << endl; + return -1; } -} - -int main(int argc, const char **argv) { - xopenme_init(GLOBAL_TIMER_COUNT, GLOBAL_VAR_COUNT); - init_armcl(); - - int status = run_test(); - - if (status == EXIT_SUCCESS) - std::cout << "Test passed\n"; - else - std::cout << "Test failed\n"; - - finish_test(); - fflush(stdout); - fflush(stderr); - - return status; + catch (const string& error_message) + { + cerr << "ERROR: " << error_message << endl; + return -1; + } + return EXIT_SUCCESS; } diff --git a/program/mobilenets-armcl-opencl/benchmark.h b/program/mobilenets-armcl-opencl/benchmark.h deleted file mode 100644 index fad0678..0000000 --- a/program/mobilenets-armcl-opencl/benchmark.h +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2018 cTuning foundation. - * See CK COPYRIGHT.txt for copyright details. - * - * SPDX-License-Identifier: BSD-3-Clause. - * See CK LICENSE.txt for licensing details. - */ - -#ifndef BENCHMARK_H -#define BENCHMARK_H - -#include "armcl_graph_common.h" - -#include - -#include -#include -#include -#include - -enum GLOBAL_TIMER { - X_TIMER_SETUP, - X_TIMER_TEST, - - GLOBAL_TIMER_COUNT -}; - -enum GLOBAL_VAR { - VAR_TIME_SETUP, - VAR_TIME_TEST, - VAR_TIME_IMG_LOAD_TOTAL, - VAR_TIME_IMG_LOAD_AVG, - VAR_TIME_CLASSIFY_TOTAL, - VAR_TIME_CLASSIFY_AVG, - - GLOBAL_VAR_COUNT -}; - -using namespace std; - -inline char path_separator() -{ -#ifdef _WIN32 - return '\\'; -#else - return '/'; -#endif -} - -inline int getenv_i(const char* name, int def) { - return getenv(name) ? atoi(getenv(name)) : def; -} - -inline float getenv_f(const char* name, float def) { - return getenv(name) ? atof(getenv(name)) : def; -} - -inline void store_value_f(int index, const char* name, float value) { - char* json_name = new char[strlen(name) + 6]; - sprintf(json_name, "\"%s\":%%f", name); - xopenme_add_var_f(index, json_name, value); - delete[] json_name; -} - -inline int get_batch_size() { - return getenv_i("CK_BATCH_SIZE", 1); -} - -inline int get_batch_count() { - return getenv_i("CK_BATCH_COUNT", 1); -} - -inline const char* get_weights_path() { - return getenv("CK_ENV_MOBILENET"); -} - -inline int get_image_size() { - return getenv_i("CK_ENV_MOBILENET_RESOLUTION", 1); -} - -inline const char* get_labels_file() { - return getenv("CK_CAFFE_IMAGENET_SYNSET_WORDS_TXT"); -} - -inline string get_mode_suffix() { - ostringstream s; - s << "-" << get_image_size() << "-" << get_batch_size() << "-" << get_batch_count() << ".txt"; - return s.str(); -} - -inline string get_images_list() { - return getenv("CK_IMAGE_LIST") + get_mode_suffix(); -} - -inline string get_batches_list() { - return getenv("CK_BATCH_LIST") + get_mode_suffix(); -} - -inline const char* get_result_dir() { - return getenv("CK_RESULTS_DIR"); -} - -inline float get_multiplier() { - return getenv_f("CK_ENV_MOBILENET_MULTIPLIER", 1); -} - -inline bool file_exists(const string& name) { - ifstream f(name); - return f.good(); -} - -class CKPredictionSession { -public: - const vector& image_files() const { return _image_files; } - const vector& batch_files() const { return _batch_files; } - int batch_index() const { return _batch_index; } - size_t batch_size() const { return _batch_size; } - size_t batch_count() const { return _batch_files.size(); } - size_t image_size() const { return _image_size; } - float total_load_images_time() const { return _total_load_images_time; } - float total_prediction_time() const { return _total_prediction_time; } - - void init() { - _batch_index = -1; - _batch_size = get_batch_size(); - _image_size = get_image_size(); - _total_load_images_time = 0; - _total_prediction_time = 0; - - load_file_list(); - } - - string get_next_batch_file() { - if (_batch_index+1 >= _batch_files.size()) - return string(); - _batch_index++; - return _batch_files[_batch_index]; - } - - void measure_begin() { - _start_time = chrono::high_resolution_clock::now(); - } - - float measure_end() { - auto finish_time = chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = finish_time - _start_time; - return elapsed.count(); - } - - float measure_end_load_images() { - auto duration = measure_end(); - _total_load_images_time += duration; - return duration; - } - - float measure_end_prediction() { - auto duration = measure_end(); - _total_prediction_time += duration; - return duration; - } - -private: - int _batch_index; - size_t _batch_size; - size_t _image_size; - vector _image_files; - vector _batch_files; - float _total_load_images_time; - float _total_prediction_time; - chrono::time_point _start_time; - - // TODO: Currently each batch consists of a single image, but it's not general case - // and additional work should be done to process real batches - // https://github.com/ARM-software/ComputeLibrary/issues/355 - void load_file_list() { - auto images_list = get_images_list(); - ifstream img_list(images_list); - for (string file_name; !getline(img_list, file_name).fail();) - _image_files.emplace_back(file_name); - cout << "Image list file: " << images_list << endl; - cout << "Image count in file: " << _image_files.size() << endl; - - auto batches_list = get_batches_list(); - ifstream batch_list(batches_list); - for (string file_name; !getline(batch_list, file_name).fail();) - _batch_files.emplace_back(file_name); - cout << "Batch list file: " << batches_list << endl; - cout << "Batch count in file: " << _batch_files.size() << endl; - - if (_batch_size != 1 || _image_files.size() != _batch_files.size()) - throw runtime_error("Only single image batches are currently supported"); - } -}; - -inline CKPredictionSession& session() { - static CKPredictionSession s; - return s; -} - -#endif // BENCHMARK_H diff --git a/program/mobilenets-armcl-opencl/mobilenets.cpp b/program/mobilenets-armcl-opencl/mobilenet.cpp similarity index 63% rename from program/mobilenets-armcl-opencl/mobilenets.cpp rename to program/mobilenets-armcl-opencl/mobilenet.cpp index 9007d41..d7d6581 100644 --- a/program/mobilenets-armcl-opencl/mobilenets.cpp +++ b/program/mobilenets-armcl-opencl/mobilenet.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#include "benchmark.h" +#include "../../../ck-math/program/armcl-classification-mobilenet/armcl_graph_common.h" #include #include @@ -34,122 +34,50 @@ using namespace arm_compute::graph_utils; using namespace arm_compute::graph::frontend; #endif -class CKNumPyInputLoader : public ITensorAccessor { + +class CKInputAccessor : public ITensorAccessor { public: - CKNumPyInputLoader() {} - CKNumPyInputLoader(CKNumPyInputLoader &&) = default; + CKInputAccessor(const float *buffer): _buffer(buffer) {} + CKInputAccessor(CKInputAccessor &&) = default; bool access_tensor(ITensor &tensor) override { - CKPredictionSession& s = session(); - auto batch_file = s.get_next_batch_file(); - if (batch_file.empty()) - return false; - - cout << endl; - cout << "Batch " << s.batch_index()+1 << " of " << s.batch_count() << endl; - cout << "File: " << batch_file << endl; - - s.measure_begin(); - - copy_int8_numpy_to_tensor(batch_file, tensor); - - auto t = s.measure_end_load_images(); - cout << "Loaded in " << t << " s\n"; - - // Start batch timer after data was loaded - s.measure_begin(); - return true; - } - -private: - void copy_int8_numpy_to_tensor(const string& file_name, ITensor &tensor) { - // Open file - ifstream stream(file_name, ios::in | ios::binary); - if (!stream.good()) - raise_error(file_name, "Unable to open file"); - string header = npy::read_header(stream); - - // Parse header - string typestr; - bool fortran_order = false; - vector shape; - npy::parse_header(header, typestr, fortran_order, shape); - - // Check if the typestring matches the given one - if (typestr != arm_compute::utils::get_typestring(DataType::U8)) - raise_error(file_name, "Typestrings mismatch"); - - // Reverse vector in case of non fortran order - if(!fortran_order) - reverse(shape.begin(), shape.end()); - - // Correct dimensions (Needs to match TensorShape dimension corrections) - const TensorShape tensor_shape = tensor.info()->tensor_shape(); - if(shape.size() != tensor_shape.num_dimensions()) - for(int i = static_cast(shape.size()) - 1; i > 0; --i) - if(shape[i] == 1) - shape.pop_back(); - else - break; - - // Validate tensor ranks and shapes - if (shape.size() != tensor_shape.num_dimensions()) - raise_error(file_name, "Tensor ranks mismatch"); - for(size_t i = 0; i < shape.size(); ++i) - if (tensor_shape[i] != shape[i]) - raise_error(file_name, "Tensor dimensions mismatch"); - - // Read data + //const size_t H = tensor.info()->dimension(0); + const size_t W = tensor.info()->dimension(1); + const size_t C = tensor.info()->dimension(2); Window window; + const TensorShape tensor_shape = tensor.info()->tensor_shape(); window.use_tensor_dimensions(tensor_shape); execute_window_loop(window, [&](const Coordinates & id) { - uint8_t value_i8; - stream.read(reinterpret_cast(&value_i8), 1); - float value_f32 = (static_cast(value_i8) / 255.0f - 0.5f) * 2.0f; + const size_t source_offset = (id[1] * W + id[0]) * C + id[2]; auto target_ptr = reinterpret_cast(tensor.ptr_to_element(id)); - *target_ptr = value_f32; + *target_ptr = _buffer[source_offset]; }); + return true; } - void raise_error(const string& file_name, const string& msg) { - ostringstream s; - s << "Failed to read batch file " << file_name << ": " << msg; - throw runtime_error(s.str()); - } +private: + const float *_buffer; }; class CKOutputAccessor : public ITensorAccessor { public: - CKOutputAccessor() {} + CKOutputAccessor(float* buffer): _buffer(buffer) {} CKOutputAccessor(CKOutputAccessor &&) = default; bool access_tensor(ITensor &tensor) override { - // Stop batch timer before processing results - CKPredictionSession& s = session(); - auto t = s.measure_end_prediction(); - cout << "Classified in " << t << "s \n"; - - // TODO: some additional work will be required when batch_size > 1 is allowed. - // We will have to split batch result into a set of results for different images. - string img_file = s.image_files()[s.batch_index()]; - string res_dir = get_result_dir(); - string res_file = res_dir + path_separator() + img_file + ".txt"; - ofstream f(res_file); - const size_t num_classes = tensor.info()->dimension(0); - float* probes = reinterpret_cast(tensor.buffer() + tensor.info()->offset_first_element_in_bytes()); - - // Take off the first probe as it references to 'background' class but no such one in ImageNet - for (size_t i = 1; i < num_classes; i++) - f << probes[i] << endl; - + float* probes = reinterpret_cast(tensor.buffer() + tensor.info()->offset_first_element_in_bytes()); + std::copy(probes, probes + num_classes, _buffer); return true; } + +private: + float* _buffer; }; std::string get_convolution_methods_file() { - auto filename = getenv("CK_CONVOLUTION_METHOD_FILE"); + auto filename = getenv("RUN_OPT_CONVOLUTION_METHOD_FILE"); return filename ? std::string(filename) : std::string("conv_methods.txt"); } @@ -178,41 +106,22 @@ std::vector load_convolution_methods(TConvolutionMethod defa return methods; } -namespace -{ -inline unique_ptr weights_accessor(const string &file) -{ - const string path = get_weights_path(); - string full_path = path + path_separator() + file; - if (!file_exists(full_path)) - { - cerr << "WARNING: file not found: " << full_path << ", dummy accessor will be used!\n"; - return arm_compute::support::cpp14::make_unique(); - } - return arm_compute::support::cpp14::make_unique(full_path); -} - -inline unique_ptr empty_accessor() { +inline std::unique_ptr empty_accessor() { return std::unique_ptr(nullptr); } -unsigned int apply_multiplier(unsigned int size) { - return static_cast(size * get_multiplier()); -} - -} // namespace - -void run_mobilenet() +void setup_mobilenet(GraphObject& graph, + unsigned int image_size, + float multiplier, + const std::string& weights_dir, + const float *input_data_buffer, + float *output_data_buffer) { - auto target_hint = get_target_hint(); - - - TensorShape input_shape(session().image_size(), - session().image_size(), - 3U, - session().batch_size()); + TensorShape input_shape(image_size, image_size, 3U, 1U); - GRAPH(graph, "MobileNetV1"); + auto weights_accessor = [&](const std::string &file) -> std::unique_ptr { + return arm_compute::support::cpp14::make_unique(weights_dir + '/' + file); + }; auto get_dwsc_node = [&](std::string &¶m_path, unsigned int conv_filt, @@ -236,7 +145,7 @@ void run_mobilenet() 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) << ConvolutionLayer( - 1U, 1U, apply_multiplier(conv_filt), + 1U, 1U, static_cast(conv_filt * multiplier), weights_accessor(param_path + "_pointwise_weights.npy"), empty_accessor(), conv_pad_stride_info) @@ -250,22 +159,22 @@ void run_mobilenet() return BranchLayer(std::move(sg)); }; + auto target_hint = get_target_hint(); auto convolution_method = load_convolution_methods(get_convolution_method()); - std::cout << "\nPrepare graph...\n"; - xopenme_clock_start(X_TIMER_SETUP); graph << target_hint + << get_convolution_method() #if defined(ARMCL_18_05_PLUS) << DepthwiseConvolutionMethod::OPTIMIZED_3x3 << InputLayer(TensorDescriptor(input_shape, DATATYPE), - arm_compute::support::cpp14::make_unique()) + arm_compute::support::cpp14::make_unique(input_data_buffer)) #else - << arm_compute::graph::Tensor(TensorInfo(input_shape, 1, DATATYPE), - arm_compute::support::cpp14::make_unique()) + << arm_compute::graph::Tensor(TensorInfo(input_shape, 1, DATATYPE), + arm_compute::support::cpp14::make_unique(input_data_buffer)) #endif << convolution_method[0] << ConvolutionLayer( - 3U, 3U, apply_multiplier(32U), + 3U, 3U, static_cast(32 * multiplier), weights_accessor("Conv2d_0_weights.npy"), empty_accessor(), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)) @@ -299,20 +208,14 @@ void run_mobilenet() << ReshapeLayer(TensorShape(1001U)) << SoftmaxLayer() #if defined(ARMCL_18_05_PLUS) - << OutputLayer(arm_compute::support::cpp14::make_unique()); + << OutputLayer(arm_compute::support::cpp14::make_unique(output_data_buffer)); #else - << arm_compute::graph::Tensor(arm_compute::support::cpp14::make_unique()); + << arm_compute::graph::Tensor(arm_compute::support::cpp14::make_unique(output_data_buffer)); #endif - xopenme_clock_end(X_TIMER_SETUP); #if defined(ARMCL_18_05_PLUS) // Finalize graph GraphConfig config {}; graph.finalize(target_hint, config); #endif - - std::cout << "\nRun graph...\n"; - xopenme_clock_start(X_TIMER_TEST); - graph.run(); - xopenme_clock_end(X_TIMER_TEST); } diff --git a/program/mobilenets-armcl-opencl/postprocess.py b/program/mobilenets-armcl-opencl/postprocess.py deleted file mode 100644 index 6c40197..0000000 --- a/program/mobilenets-armcl-opencl/postprocess.py +++ /dev/null @@ -1,202 +0,0 @@ -# -# Copyright (c) 2018 cTuning foundation. -# See CK COPYRIGHT.txt for copyright details. -# -# SPDX-License-Identifier: BSD-3-Clause. -# See CK LICENSE.txt for licensing details. -# - -import os -import json - -TOP1 = 0 -TOP5 = 0 - -def ck_postprocess(i): - print('\n--------------------------------') - def my_env(var): return i['env'][var] - def dep_env(dep, var): return i['deps'][dep]['dict']['env'][var] - - # Init variables from environment - BATCH_COUNT = int(my_env('CK_BATCH_COUNT')) - BATCH_SIZE = int(my_env('CK_BATCH_SIZE')) - IMAGES_COUNT = BATCH_COUNT * BATCH_SIZE - SKIP_IMAGES = int(my_env('CK_SKIP_IMAGES')) - RESULTS_DIR = my_env('CK_RESULTS_DIR') - NUM_CLASSES = 1000 - AUX_DIR = dep_env('imagenet-aux', 'CK_ENV_DATASET_IMAGENET_AUX') - CLASSES_FILE = os.path.join(AUX_DIR, 'synset_words.txt') - VALUES_FILE = os.path.join(AUX_DIR, 'val.txt') - CLASSES_LIST = [] - VALUES_MAP = {} - IMAGE_FILE = my_env('CK_IMAGE_FILE') - - # Single file mode - if IMAGE_FILE: - IMAGES_COUNT = 1 - _, IMAGE_FILE = os.path.split(IMAGE_FILE) - - - def load_ImageNet_classes(): - ''' - Loads ImageNet classes and correct predictions - ''' - classes_list = [] - with open(CLASSES_FILE, 'r') as classes_file: - classes_list = classes_file.read().splitlines() - - values_map = {} - with open(VALUES_FILE, 'r') as values_file: - if IMAGE_FILE: - # Single file mode: try to find this file in values - for line in values_file: - file_name, file_class = line.split() - if file_name == IMAGE_FILE: - values_map[file_name] = int(file_class) - break - else: - # Directory mode: load only required amount of values - for _ in range(SKIP_IMAGES): - values_file.readline().split() - for _ in range(IMAGES_COUNT): - val = values_file.readline().split() - values_map[val[0]] = int(val[1]) - - return classes_list, values_map - - - def get_class_str(class_index): - ''' - Returns printable string for ImageNet specific class - ''' - obj_class = CLASSES_LIST[class_index] - if len(obj_class) > 50: - obj_class = obj_class[:50] + '...' - return '(%d) %s' % (class_index, obj_class) - - - def print_predictions(top5, img_file): - ''' - Shows prediction results for image file - top5 - list of pairs (prob, class_index) - ''' - print('---------------------------------------') - if img_file in VALUES_MAP: - class_correct = VALUES_MAP[img_file] - print('%s - %s' % (img_file, get_class_str(class_correct))) - else: - print(img_file) - for prob, class_index in top5: - print('%.2f - %s' % (prob, get_class_str(class_index))) - print('---------------------------------------') - - - def get_top5(all_probs): - ''' - Returns list of pairs (prob, class_index) - ''' - probs_with_classes = [] - for class_index in range(len(all_probs)): - prob = all_probs[class_index] - probs_with_classes.append((prob, class_index)) - sorted_probs = sorted(probs_with_classes, key = lambda pair: pair[0], reverse=True) - return sorted_probs[0:5] - - - def check_predictions(top5, img_file): - ''' - Calculates if prediction was correct for specified image file - top5 - list of pairs (prob, class_index) - ''' - if img_file not in VALUES_MAP: - print('Correctness information is not available') - return {} - - class_correct = VALUES_MAP[img_file] - classes = [c[1] for c in top5] - is_top1 = class_correct == classes[0] - is_top5 = class_correct in classes - if is_top1: - global TOP1 - TOP1 += 1 - if is_top5: - global TOP5 - TOP5 += 1 - res = {} - res['accuracy_top1'] = 'yes' if is_top1 else 'no' - res['accuracy_top5'] = 'yes' if is_top5 else 'no' - res['class_correct'] = class_correct - res['class_topmost'] = classes[0] - res['file_name'] = img_file - return res - - - frame_predictions = [] - - - def calculate_precision(): - print('Process results in {}'.format(RESULTS_DIR)) - - def load_probes(filename): - probes = [] - with open(os.path.join(RESULTS_DIR, filename), 'r') as f: - for line in f: - s = line.strip() - if s: probes.append(float(s)) - return probes - - - for res_file in sorted(os.listdir(RESULTS_DIR)): - # remove trailing suffix .txt - img_file = res_file[:-4] - - all_probes = load_probes(res_file) - if len(all_probes) != NUM_CLASSES: - print('WARNING: {} is invalid probes count in file {}, results ignored'.format(len(all_probes), res_file)) - global IMAGES_COUNT - IMAGES_COUNT -= 1 - continue - - top5 = get_top5(all_probes) - print_predictions(top5, img_file) - res = check_predictions(top5, img_file) - frame_predictions.append(res) - - - global TOP1 - global TOP5 - TOP1 = 0 - TOP5 = 0 - CLASSES_LIST, VALUES_MAP = load_ImageNet_classes() - calculate_precision() - - accuracy_top1 = TOP1 / float(IMAGES_COUNT) - accuracy_top5 = TOP5 / float(IMAGES_COUNT) - print('Accuracy top 1: %f (%d of %d)' % (accuracy_top1, TOP1, IMAGES_COUNT)) - print('Accuracy top 5: %f (%d of %d)' % (accuracy_top5, TOP5, IMAGES_COUNT)) - - # Store benchmark results - openme = {} - - # Preserve values stored by program - with open('tmp-ck-timer.json', 'r') as o: - old_values = json.load(o) - for key in old_values['run_time_state']: - openme[key] = old_values['run_time_state'][key] - - openme['accuracy_top1'] = accuracy_top1 - openme['accuracy_top5'] = accuracy_top5 - openme['frame_predictions'] = frame_predictions - - t1=openme.get('setup_time_s',0.0) - t2=openme.get('images_load_time_s',0.0) - t3=openme.get('prediction_time_total_s',0.0) - - openme['execution_time']=t3 # only prediction time (what we are interested in) - openme['execution_time_sum']=t1+t2+t3 # only prediction time (what we are interested in) - - with open('tmp-ck-timer.json', 'w') as o: - json.dump(openme, o, indent=2, sort_keys=True) - - print('--------------------------------\n') - return {'return': 0} diff --git a/program/mobilenets-armcl-opencl/preprocess-next.py b/program/mobilenets-armcl-opencl/preprocess-next.py new file mode 100644 index 0000000..22c64ca --- /dev/null +++ b/program/mobilenets-armcl-opencl/preprocess-next.py @@ -0,0 +1,54 @@ +# +# Copyright (c) 2018 cTuning foundation. +# See CK COPYRIGHT.txt for copyright details. +# +# SPDX-License-Identifier: BSD-3-Clause. +# See CK LICENSE.txt for licensing details. +# + +import os + +def ck_preprocess(i): + def dep_env(dep, var): return i['deps'][dep]['dict']['env'].get(var) + + # Setup parameters for program + new_env = {} + files_to_push_by_path = {} + run_input_files = [] + + WEIGHTS_DIR = dep_env('weights', 'CK_ENV_MOBILENET') + CONV_METHOD_FILE = i['env'].get('CK_CONVOLUTION_METHOD_FILE', 'conv_methods.txt') + + if i['target_os_dict'].get('remote','') == 'yes': + if i['env'].get('CK_PUSH_LIBS_TO_REMOTE', 'yes').lower() == 'yes': + lib_dir = dep_env('library', 'CK_ENV_LIB_ARMCL') + lib_name = dep_env('library', 'CK_ENV_LIB_ARMCL_DYNAMIC_CORE_NAME') + files_to_push_by_path['CK_ENV_ARMCL_CORE_LIB_PATH'] = os.path.join(lib_dir, 'lib', lib_name) + run_input_files.append('$<>$') + + if i['env'].get('CK_PUSH_WEIGHTS_TO_REMOTE', 'yes').lower() == 'yes': + file_index = 0 + for file_name in os.listdir(WEIGHTS_DIR): + if file_name.endswith('.npy'): + var_name = 'CK_ENV_WEIGHTS_' + str(file_index) + files_to_push_by_path[var_name] = os.path.join(WEIGHTS_DIR, file_name) + file_index += 1 + + if os.path.isfile(CONV_METHOD_FILE): + run_input_files.append(os.path.join(os.getcwd(), CONV_METHOD_FILE)) + + new_env['RUN_OPT_GRAPH_FILE'] = '.' + else: + new_env['RUN_OPT_GRAPH_FILE'] = WEIGHTS_DIR + + new_env['RUN_OPT_RESOLUTION'] = dep_env('weights', 'CK_ENV_MOBILENET_RESOLUTION') + new_env['RUN_OPT_MULTIPLIER'] = dep_env('weights', 'CK_ENV_MOBILENET_MULTIPLIER') + new_env['RUN_OPT_CONVOLUTION_METHOD_FILE'] = CONV_METHOD_FILE + + print('--------------------------------\n') + return { + 'return': 0, + 'new_env': new_env, + 'run_input_files': run_input_files, + 'files_to_push_by_path': files_to_push_by_path, + } diff --git a/program/mobilenets-armcl-opencl/preprocess.py b/program/mobilenets-armcl-opencl/preprocess.py deleted file mode 100644 index 9c39c52..0000000 --- a/program/mobilenets-armcl-opencl/preprocess.py +++ /dev/null @@ -1,160 +0,0 @@ -# -# Copyright (c) 2018 cTuning foundation. -# See CK COPYRIGHT.txt for copyright details. -# -# SPDX-License-Identifier: BSD-3-Clause. -# See CK LICENSE.txt for licensing details. -# - -import os -import re -import json -import shutil -import numpy as np -import scipy.io -from scipy.ndimage import zoom - -def recreate_dir(d): - if os.path.isdir(d): - shutil.rmtree(d) - os.mkdir(d) - -def ck_preprocess(i): - print('\n--------------------------------') - def my_env(var): return i['env'][var] - def dep_env(dep, var): return i['deps'][dep]['dict']['env'][var] - - # Init variables from environment - BATCH_COUNT = int(my_env('CK_BATCH_COUNT')) - BATCH_SIZE = int(my_env('CK_BATCH_SIZE')) - IMAGES_COUNT = BATCH_COUNT * BATCH_SIZE - SKIP_IMAGES = int(my_env('CK_SKIP_IMAGES')) - IMAGE_DIR = dep_env('imagenet-val', 'CK_ENV_DATASET_IMAGENET_VAL') - IMAGE_SIZE = int(dep_env('weights', 'CK_ENV_MOBILENET_RESOLUTION')) - MODE_SUFFIX = '-{}-{}-{}'.format(IMAGE_SIZE, BATCH_SIZE, BATCH_COUNT) - IMAGE_LIST = my_env('CK_IMAGE_LIST') + MODE_SUFFIX + '.txt' - BATCHES_DIR = my_env('CK_BATCHES_DIR') + MODE_SUFFIX - BATCH_LIST = my_env('CK_BATCH_LIST') + MODE_SUFFIX + '.txt' - RESULTS_DIR = my_env('CK_RESULTS_DIR') - PREPARE_ALWAYS = my_env('CK_PREPARE_ALWAYS') - IMAGE_FILE = my_env('CK_IMAGE_FILE') - - # Single file mode - if IMAGE_FILE: - assert os.path.isfile(IMAGE_FILE) - PREPARE_ALWAYS = 'YES' - BATCH_COUNT = 1 - BATCH_SIZE = 1 - IMAGES_COUNT = 1 - SKIP_IMAGES = 0 - IMAGE_DIR, IMAGE_FILE = os.path.split(IMAGE_FILE) - print('Single file mode') - print('Image file: {}'.format(IMAGE_FILE)) - - print('Batch size: {}'.format(BATCH_SIZE)) - print('Batch count: {}'.format(BATCH_COUNT)) - print('Batch list: {}'.format(BATCH_LIST)) - print('Skip images: {}'.format(SKIP_IMAGES)) - print('Image dir: {}'.format(IMAGE_DIR)) - print('Image list: {}'.format(IMAGE_LIST)) - print('Image size: {}'.format(IMAGE_SIZE)) - print('Batches dir: {}'.format(BATCHES_DIR)) - print('Results dir: {}'.format(RESULTS_DIR)) - - - def prepare_batches(): - print('\nPrepare images...') - - # Load processing image filenames - images = [] - if IMAGE_FILE: - # Single file mode - images.append(IMAGE_FILE) - else: - # Directory mode - assert os.path.isdir(IMAGE_DIR), 'Input dir does not exit' - files = [f for f in os.listdir(IMAGE_DIR) if os.path.isfile(os.path.join(IMAGE_DIR, f))] - files = [f for f in files if re.search(r'\.jpg$', f, re.IGNORECASE) - or re.search(r'\.jpeg$', f, re.IGNORECASE)] - assert len(files) > 0, 'Input dir does not contain image files' - files = sorted(files)[SKIP_IMAGES:] - assert len(files) > 0, 'Input dir does not contain more files' - images = files[:IMAGES_COUNT] - if len(images) < IMAGES_COUNT: - for _ in range(IMAGES_COUNT-len(images)): - images.append(images[-1]) - - # Save image list file - assert IMAGE_LIST, 'Image list file name is not set' - with open(IMAGE_LIST, 'w') as f: - for img in images: - f.write('{}\n'.format(img)) - - dst_images = [] - - for img_file in images: - src_img_path = os.path.join(IMAGE_DIR, img_file) - dst_img_path = os.path.join(BATCHES_DIR, img_file) + '.npy' - - img = scipy.misc.imread(src_img_path) - # check if grayscale and convert to RGB - if len(img.shape) == 2: - img = np.dstack((img,img,img)) - # drop alpha-channel if present - if img.shape[2] > 3: - img = img[:,:,:3] - - # The same image preprocessing steps are used for MobileNet as for Inception: - # https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/inception_preprocessing.py - - # Crop the central region of the image with an area containing 87.5% of the original image. - new_w = int(img.shape[0] * 0.875) - new_h = int(img.shape[1] * 0.875) - offset_w = (img.shape[0] - new_w)/2 - offset_h = (img.shape[1] - new_h)/2 - img = img[offset_w:new_w+offset_w, offset_h:new_h+offset_h, :] - - # Zoom to target size - zoom_w = float(IMAGE_SIZE)/float(img.shape[0]) - zoom_h = float(IMAGE_SIZE)/float(img.shape[1]) - img = zoom(img, [zoom_w, zoom_h, 1]) - - # Each image is a batch in NCHW format - img = img.transpose(2, 0, 1) - img = np.expand_dims(img, 0) - img = np.ascontiguousarray(img) - - np.save(dst_img_path, img) - dst_images.append(dst_img_path) - - if len(dst_images) % 10 == 0: - print('Prepared images: {} of {}'.format(len(dst_images), len(images))) - - # Save image list file - assert BATCH_LIST, 'Batch list file name is not set' - with open(BATCH_LIST, 'w') as f: - for img in dst_images: - f.write('{}\n'.format(img)) - - # Prepare results directory - recreate_dir(RESULTS_DIR) - - - # Prepare batches or use prepared - do_prepare_batches = True - if PREPARE_ALWAYS != 'YES': - do_prepare_batches = False - - if not do_prepare_batches: - if not os.path.isdir(BATCHES_DIR): - do_prepare_batches = True - - if do_prepare_batches: - recreate_dir(BATCHES_DIR) - prepare_batches() - else: - print('\nBatches preparation is skipped, use previous batches') - - print('--------------------------------\n') - return {'return': 0} - diff --git a/script/mobilenets-armcl-opencl/benchmark.py b/script/mobilenets-armcl-opencl/benchmark.py index 02278ce..dbccdce 100644 --- a/script/mobilenets-armcl-opencl/benchmark.py +++ b/script/mobilenets-armcl-opencl/benchmark.py @@ -176,10 +176,6 @@ def do(i, arg): 'env':{ 'CK_ENV_DATASET_IMAGENET_VAL':img_dir_val, 'CK_BATCH_COUNT':batch_count, - 'CK_BATCHES_DIR':'../batches', - 'CK_BATCH_LIST':'../batches', - 'CK_IMAGE_LIST':'../images', - 'CK_RESULTS_DIR':'predictions', 'CK_SKIP_IMAGES':0 }, @@ -317,7 +313,7 @@ def do(i, arg): '##choices#env#CK_ENV_MOBILENET_RESOLUTION' ], [ - '##choices#env#CK_ENV_MOBILENET_WIDTH_MULTIPLIER' + '##choices#env#CK_ENV_MOBILENET_MULTIPLIER' ] ], 'choices_selection':[