From 4c951e1b5f2e2e5ea15d6c768a98a09067177b46 Mon Sep 17 00:00:00 2001 From: cqy123456 Date: Mon, 19 Feb 2024 05:02:07 -0500 Subject: [PATCH] diskann support new data type(fp16/bf16) Signed-off-by: cqy123456 --- include/knowhere/operands.h | 2 + src/index/diskann/diskann.cc | 5 +- thirdparty/DiskANN/include/diskann/distance.h | 3 +- thirdparty/DiskANN/include/diskann/index.h | 2 +- .../DiskANN/include/diskann/pq_flash_index.h | 2 +- thirdparty/DiskANN/include/diskann/utils.h | 55 ++++++++---- thirdparty/DiskANN/src/aux_utils.cpp | 89 ++++++++++++++----- thirdparty/DiskANN/src/distance.cpp | 17 ++-- thirdparty/DiskANN/src/index.cpp | 81 ++++++++++++++--- thirdparty/DiskANN/src/partition_and_pq.cpp | 62 +++++++++++-- thirdparty/DiskANN/src/pq_flash_index.cpp | 27 +++--- 11 files changed, 263 insertions(+), 82 deletions(-) diff --git a/include/knowhere/operands.h b/include/knowhere/operands.h index e8ad14ed6..91034603a 100644 --- a/include/knowhere/operands.h +++ b/include/knowhere/operands.h @@ -139,6 +139,8 @@ template using TypeMatch = std::bool_constant<(... | std::is_same_v)>; template using KnowhereDataTypeCheck = TypeMatch; +template +using KnowhereFloatTypeCheck = TypeMatch; template struct MockData { diff --git a/src/index/diskann/diskann.cc b/src/index/diskann/diskann.cc index 171038b30..ef0919f9c 100644 --- a/src/index/diskann/diskann.cc +++ b/src/index/diskann/diskann.cc @@ -32,7 +32,8 @@ namespace knowhere { template class DiskANNIndexNode : public IndexNode { - static_assert(std::is_same_v, "DiskANN only support float"); + static_assert(KnowhereFloatTypeCheck::value, + "DiskANN only support floating point data type(float32, float16, bfloat16)"); public: using DistType = float; @@ -697,4 +698,6 @@ DiskANNIndexNode::GetCachedNodeNum(const float cache_dram_budget, cons } KNOWHERE_SIMPLE_REGISTER_GLOBAL(DISKANN, DiskANNIndexNode, fp32); +KNOWHERE_SIMPLE_REGISTER_GLOBAL(DISKANN, DiskANNIndexNode, fp16); +KNOWHERE_SIMPLE_REGISTER_GLOBAL(DISKANN, DiskANNIndexNode, bf16); } // namespace knowhere diff --git a/thirdparty/DiskANN/include/diskann/distance.h b/thirdparty/DiskANN/include/diskann/distance.h index bf462eabd..a31ee2b90 100644 --- a/thirdparty/DiskANN/include/diskann/distance.h +++ b/thirdparty/DiskANN/include/diskann/distance.h @@ -2,10 +2,11 @@ #include #include "simd/hook.h" #include "diskann/utils.h" +#include "knowhere/operands.h" namespace diskann { template - using DISTFUN = T (*)(const T *, const T *, size_t); + using DISTFUN = std::function; template DISTFUN get_distance_function(Metric m); diff --git a/thirdparty/DiskANN/include/diskann/index.h b/thirdparty/DiskANN/include/diskann/index.h index 28de6bfc1..c56e13a96 100644 --- a/thirdparty/DiskANN/include/diskann/index.h +++ b/thirdparty/DiskANN/include/diskann/index.h @@ -351,7 +351,7 @@ namespace diskann { size_t _num_frozen_pts = 0; bool _has_built = false; DISTFUN _func = nullptr; - std::function _distance; + DISTFUN _distance; unsigned _width = 0; unsigned _ep = 0; size_t _max_range_of_loaded_graph = 0; diff --git a/thirdparty/DiskANN/include/diskann/pq_flash_index.h b/thirdparty/DiskANN/include/diskann/pq_flash_index.h index 299c76050..a982be2f2 100644 --- a/thirdparty/DiskANN/include/diskann/pq_flash_index.h +++ b/thirdparty/DiskANN/include/diskann/pq_flash_index.h @@ -233,7 +233,7 @@ namespace diskann { DISTFUN dist_cmp; DISTFUN dist_cmp_float; - T dist_cmp_wrap(const T *x, const T *y, size_t d, int32_t u) { + float dist_cmp_wrap(const T *x, const T *y, size_t d, int32_t u) { if (metric == Metric::COSINE) { return dist_cmp(x, y, d) / base_norms[u]; } else { diff --git a/thirdparty/DiskANN/include/diskann/utils.h b/thirdparty/DiskANN/include/diskann/utils.h index 282ea84a7..144b3f20d 100644 --- a/thirdparty/DiskANN/include/diskann/utils.h +++ b/thirdparty/DiskANN/include/diskann/utils.h @@ -36,6 +36,7 @@ typedef int FileHandle; #include "ann_exception.h" #include "common_includes.h" #include "knowhere/comp/thread_pool.h" +#include "knowhere/operands.h" // taken from // https://github.com/Microsoft/BLAS-on-flash/blob/master/include/utils.h @@ -61,7 +62,7 @@ typedef int FileHandle; #define COMPLETION_PERCENT 10 inline bool file_exists(const std::string& name, bool dirCheck = false) { - int val; + int val; struct stat buffer; val = stat(name.c_str(), &buffer); @@ -537,7 +538,8 @@ namespace diskann { for (size_t i = 0; i < npts; i++) { reader.read((char*) (data + i * rounded_dim), dim * sizeof(T)); - memset(data + i * rounded_dim + dim, 0, (rounded_dim - dim) * sizeof(T)); + memset((void*) (data + i * rounded_dim + dim), 0, + (rounded_dim - dim) * sizeof(T)); } stream << " done." << std::endl; LOG_KNOWHERE_DEBUG_ << stream.str(); @@ -583,6 +585,13 @@ namespace diskann { template float prepare_base_for_inner_products(const std::string in_file, const std::string out_file) { + if (!knowhere::KnowhereFloatTypeCheck::value) { + std::stringstream stream; + stream << "DiskANN currently only supports floating point(float32, " + "float16, bfloat16) for IP." + << std::endl; + throw diskann::ANNException(stream.str(), -1); + } LOG_KNOWHERE_DEBUG_ << "Pre-processing base file by adding extra coordinate"; std::ifstream in_reader(in_file.c_str(), std::ios::binary); @@ -606,10 +615,11 @@ namespace diskann { size_t block_size = npts <= BLOCK_SIZE ? npts : BLOCK_SIZE; std::unique_ptr in_block_data = std::make_unique(block_size * in_dims); - std::unique_ptr out_block_data = - std::make_unique(block_size * out_dims); + std::unique_ptr out_block_data = + std::make_unique(block_size * out_dims); - std::memset(out_block_data.get(), 0, sizeof(float) * block_size * out_dims); + std::memset((void*) out_block_data.get(), 0, + sizeof(T) * block_size * out_dims); _u64 num_blocks = DIV_ROUND_UP(npts, block_size); std::vector norms(npts, 0); @@ -642,14 +652,14 @@ namespace diskann { for (_u64 p = 0; p < block_pts; p++) { for (_u64 j = 0; j < in_dims; j++) { out_block_data[p * out_dims + j] = - in_block_data[p * in_dims + j] / max_norm; + (T) (((float) in_block_data[p * in_dims + j]) / max_norm); } float res = 1 - (norms[start_id + p] / (max_norm * max_norm)); res = res <= 0 ? 0 : std::sqrt(res); - out_block_data[p * out_dims + out_dims - 1] = res; + out_block_data[p * out_dims + out_dims - 1] = (T) res; } out_writer.write((char*) out_block_data.get(), - block_pts * out_dims * sizeof(float)); + block_pts * out_dims * sizeof(T)); } out_writer.close(); return max_norm; @@ -657,9 +667,15 @@ namespace diskann { template std::vector prepare_base_for_cosine(const std::string in_file, - const std::string out_file) { - LOG_KNOWHERE_DEBUG_ - << "Pre-processing base file by normalizing"; + const std::string out_file) { + if (!knowhere::KnowhereFloatTypeCheck::value) { + std::stringstream stream; + stream << "DiskANN currently only supports floating point(float32, " + "float16, bfloat16) for Cosine." + << std::endl; + throw diskann::ANNException(stream.str(), -1); + } + LOG_KNOWHERE_DEBUG_ << "Pre-processing base file by normalizing"; std::ifstream in_reader(in_file.c_str(), std::ios::binary); std::ofstream out_writer(out_file.c_str(), std::ios::binary); _u64 npts, in_dims, out_dims; @@ -680,10 +696,11 @@ namespace diskann { size_t block_size = npts <= BLOCK_SIZE ? npts : BLOCK_SIZE; std::unique_ptr in_block_data = std::make_unique(block_size * in_dims); - std::unique_ptr out_block_data = - std::make_unique(block_size * out_dims); + std::unique_ptr out_block_data = + std::make_unique(block_size * out_dims); - std::memset(out_block_data.get(), 0, sizeof(float) * block_size * out_dims); + std::memset((void*) out_block_data.get(), 0, + sizeof(T) * block_size * out_dims); _u64 num_blocks = DIV_ROUND_UP(npts, block_size); std::vector norms(npts, 0); @@ -716,11 +733,12 @@ namespace diskann { for (_u64 p = 0; p < block_pts; p++) { for (_u64 j = 0; j < in_dims; j++) { out_block_data[p * out_dims + j] = - in_block_data[p * in_dims + j] / norms[start_id + p]; + (T) (((float) in_block_data[p * in_dims + j]) / + norms[start_id + p]); } } out_writer.write((char*) out_block_data.get(), - block_pts * out_dims * sizeof(float)); + block_pts * out_dims * sizeof(T)); } out_writer.close(); @@ -805,7 +823,8 @@ namespace diskann { for (size_t i = 0; i < npts; i++) { reader.read((char*) (data + i * rounded_dim), dim * sizeof(T)); - memset(data + i * rounded_dim + dim, 0, (rounded_dim - dim) * sizeof(T)); + memset((void*) (data + i * rounded_dim + dim), 0, + (rounded_dim - dim) * sizeof(T)); } } @@ -837,7 +856,7 @@ namespace diskann { float* read_buf, _u64 npts, _u64 ndims); void normalize_data_file(const std::string& inFileName, - const std::string& outFileName); + const std::string& outFileName); inline std::string get_pq_pivots_filename(const std::string& prefix) { return prefix + "_pq_pivots.bin"; diff --git a/thirdparty/DiskANN/src/aux_utils.cpp b/thirdparty/DiskANN/src/aux_utils.cpp index 04488e56b..bc2b995ad 100644 --- a/thirdparty/DiskANN/src/aux_utils.cpp +++ b/thirdparty/DiskANN/src/aux_utils.cpp @@ -227,7 +227,7 @@ namespace diskann { diskann::alloc_aligned(((void **) &warmup), warmup_num * warmup_aligned_dim * sizeof(T), 8 * sizeof(T)); - std::memset(warmup, 0, warmup_num * warmup_aligned_dim * sizeof(T)); + std::memset((void*) warmup, 0, warmup_num * warmup_aligned_dim * sizeof(T)); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<> dis(-128, 127); @@ -760,7 +760,6 @@ namespace diskann { } save_bin(cache_file, node_list.data(), num_nodes_to_cache, 1); - } // General purpose support for DiskANN interface @@ -1064,7 +1063,7 @@ namespace diskann { template int build_disk_index(const BuildConfig &config) { - if (!std::is_same::value && + if (!knowhere::KnowhereFloatTypeCheck::value && (config.compare_metric == diskann::Metric::INNER_PRODUCT || config.compare_metric == diskann::Metric::COSINE)) { std::stringstream stream; @@ -1305,6 +1304,12 @@ namespace diskann { const std::string mem_index_file, const std::string output_file, const std::string reorder_data_file); + template void create_disk_layout( + const std::string base_file, const std::string mem_index_file, + const std::string output_file, const std::string reorder_data_file); + template void create_disk_layout( + const std::string base_file, const std::string mem_index_file, + const std::string output_file, const std::string reorder_data_file); template int8_t *load_warmup(const std::string &cache_warmup_file, uint64_t &warmup_num, @@ -1317,6 +1322,12 @@ namespace diskann { template float *load_warmup(const std::string &cache_warmup_file, uint64_t &warmup_num, uint64_t warmup_dim, uint64_t warmup_aligned_dim); + template knowhere::fp16 *load_warmup( + const std::string &cache_warmup_file, uint64_t &warmup_num, + uint64_t warmup_dim, uint64_t warmup_aligned_dim); + template knowhere::bf16 *load_warmup( + const std::string &cache_warmup_file, uint64_t &warmup_num, + uint64_t warmup_dim, uint64_t warmup_aligned_dim); template uint32_t optimize_beamwidth( std::unique_ptr> &pFlashIndex, @@ -1333,35 +1344,53 @@ namespace diskann { float *tuning_sample, _u64 tuning_sample_num, _u64 tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, uint32_t start_bw); + template uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, + knowhere::fp16 *tuning_sample, _u64 tuning_sample_num, + _u64 tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, + uint32_t start_bw); + template uint32_t optimize_beamwidth( + std::unique_ptr> &pFlashIndex, + knowhere::bf16 *tuning_sample, _u64 tuning_sample_num, + _u64 tuning_sample_aligned_dim, uint32_t L, uint32_t nthreads, + uint32_t start_bw); template int build_disk_index(const BuildConfig &config); template int build_disk_index(const BuildConfig &config); template int build_disk_index(const BuildConfig &config); + template int build_disk_index(const BuildConfig &config); + template int build_disk_index(const BuildConfig &config); template std::unique_ptr> - build_merged_vamana_index(std::string base_file, bool ip_prepared, - diskann::Metric compareMetric, unsigned L, - unsigned R, bool accelerate_build, bool shuffle_build, - double sampling_rate, double ram_budget, - std::string mem_index_path, - std::string medoids_path, - std::string centroids_file); + build_merged_vamana_index( + std::string base_file, bool ip_prepared, diskann::Metric compareMetric, + unsigned L, unsigned R, bool accelerate_build, bool shuffle_build, + double sampling_rate, double ram_budget, std::string mem_index_path, + std::string medoids_path, std::string centroids_file); template std::unique_ptr> - build_merged_vamana_index(std::string base_file, bool ip_prepared, - diskann::Metric compareMetric, unsigned L, - unsigned R, bool accelerate_build, bool shuffle_build, - double sampling_rate, double ram_budget, - std::string mem_index_path, - std::string medoids_path, - std::string centroids_file); + build_merged_vamana_index( + std::string base_file, bool ip_prepared, diskann::Metric compareMetric, + unsigned L, unsigned R, bool accelerate_build, bool shuffle_build, + double sampling_rate, double ram_budget, std::string mem_index_path, + std::string medoids_path, std::string centroids_file); template std::unique_ptr> - build_merged_vamana_index(std::string base_file, bool ip_prepared, - diskann::Metric compareMetric, unsigned L, - unsigned R, bool accelerate_build, bool shuffle_build, - double sampling_rate, double ram_budget, - std::string mem_index_path, - std::string medoids_path, - std::string centroids_file); + build_merged_vamana_index( + std::string base_file, bool ip_prepared, diskann::Metric compareMetric, + unsigned L, unsigned R, bool accelerate_build, bool shuffle_build, + double sampling_rate, double ram_budget, std::string mem_index_path, + std::string medoids_path, std::string centroids_file); + template std::unique_ptr> + build_merged_vamana_index( + std::string base_file, bool ip_prepared, diskann::Metric compareMetric, + unsigned L, unsigned R, bool accelerate_build, bool shuffle_build, + double sampling_rate, double ram_budget, std::string mem_index_path, + std::string medoids_path, std::string centroids_file); + template std::unique_ptr> + build_merged_vamana_index( + std::string base_file, bool ip_prepared, diskann::Metric compareMetric, + unsigned L, unsigned R, bool accelerate_build, bool shuffle_build, + double sampling_rate, double ram_budget, std::string mem_index_path, + std::string medoids_path, std::string centroids_file); template void generate_cache_list_from_graph_with_pq( _u64 num_nodes_to_cache, unsigned R, const diskann::Metric compare_metric, @@ -1381,4 +1410,16 @@ namespace diskann { const std::string &pq_compressed_code_path, const unsigned entry_point, const std::vector> &graph, const std::string &cache_file); + template void generate_cache_list_from_graph_with_pq( + _u64 num_nodes_to_cache, unsigned R, const diskann::Metric compare_metric, + const std::string &sample_file, const std::string &pq_pivots_path, + const std::string &pq_compressed_code_path, const unsigned entry_point, + const std::vector> &graph, + const std::string &cache_file); + template void generate_cache_list_from_graph_with_pq( + _u64 num_nodes_to_cache, unsigned R, const diskann::Metric compare_metric, + const std::string &sample_file, const std::string &pq_pivots_path, + const std::string &pq_compressed_code_path, const unsigned entry_point, + const std::vector> &graph, + const std::string &cache_file); }; // namespace diskann diff --git a/thirdparty/DiskANN/src/distance.cpp b/thirdparty/DiskANN/src/distance.cpp index 32fcbd844..2f150d72c 100644 --- a/thirdparty/DiskANN/src/distance.cpp +++ b/thirdparty/DiskANN/src/distance.cpp @@ -5,15 +5,16 @@ namespace diskann { template DISTFUN get_distance_function(diskann::Metric m) { if (m == diskann::Metric::L2) { - return [](const T* x, const T* y, size_t size) -> T { + return [](const T* x, const T* y, size_t size) -> float { float res = 0; for (size_t i = 0; i < size; i++) { res += ((float) x[i] - (float) y[i]) * ((float) x[i] - (float) y[i]); } return res; }; - } else if (m == diskann::Metric::INNER_PRODUCT) { - return [](const T* x, const T* y, size_t size) -> T { + } else if (m == diskann::Metric::INNER_PRODUCT || + m == diskann::Metric::COSINE) { + return [](const T* x, const T* y, size_t size) -> float { float res = 0; for (size_t i = 0; i < size; i++) { res += (float) x[i] * (float) y[i]; @@ -65,11 +66,15 @@ namespace diskann { } } - template DISTFUN get_distance_function(diskann::Metric m); - template DISTFUN get_distance_function(diskann::Metric m); - template DISTFUN get_distance_function(diskann::Metric m); + template DISTFUN get_distance_function(diskann::Metric m); + template DISTFUN get_distance_function(diskann::Metric m); + template DISTFUN get_distance_function(diskann::Metric m); + template DISTFUN get_distance_function(diskann::Metric m); + template DISTFUN get_distance_function(diskann::Metric m); template float norm_l2sqr(const float*, size_t); template float norm_l2sqr(const uint8_t*, size_t); template float norm_l2sqr(const int8_t*, size_t); + template float norm_l2sqr(const knowhere::fp16*, size_t); + template float norm_l2sqr(const knowhere::bf16*, size_t); } // namespace diskann diff --git a/thirdparty/DiskANN/src/index.cpp b/thirdparty/DiskANN/src/index.cpp index 5c994f00f..e0b3f9919 100644 --- a/thirdparty/DiskANN/src/index.cpp +++ b/thirdparty/DiskANN/src/index.cpp @@ -80,7 +80,7 @@ namespace diskann { auto aligned_dim = ROUND_UP(dim, 8); size_t allocSize = aligned_dim * sizeof(T); alloc_aligned(((void **) &aligned_query), allocSize, 8 * sizeof(T)); - memset(aligned_query, 0, aligned_dim * sizeof(T)); + memset((void *) aligned_query, 0, aligned_dim * sizeof(T)); auto l_to_use = diskann_max(search_l, indexing_l); @@ -267,7 +267,7 @@ namespace diskann { alloc_aligned(((void **) &_data), (_max_points + _num_frozen_pts) * _aligned_dim * sizeof(T), 8 * sizeof(T)); - std::memset(_data, 0, + std::memset((void *) _data, 0, (_max_points + _num_frozen_pts) * _aligned_dim * sizeof(T)); _ep = (unsigned) _max_points; @@ -286,7 +286,7 @@ namespace diskann { this->_func = get_distance_function(m); if (ip_prepared) { _padding_id = _dim - 1; - this->_distance = [this](const T* x, const T* y, size_t n) -> T { + this->_distance = [this](const T* x, const T* y, size_t n) -> float { auto ret = _func(x, y, n); return ret + 2*x[_padding_id]*y[_padding_id]; }; @@ -345,7 +345,7 @@ namespace diskann { template void Index::clear_index() { - memset(_data, 0, + memset((void*) _data, 0, _aligned_dim * (_max_points + _num_frozen_pts) * sizeof(T)); _nd = 0; for (size_t i = 0; i < _final_graph.size(); i++) @@ -364,7 +364,7 @@ namespace diskann { return 0; } size_t tag_bytes_written; - auto tag_data = std::make_unique(_nd + _num_frozen_pts); + auto tag_data = std::make_unique(_nd + _num_frozen_pts); for (_u32 i = 0; i < _nd; i++) { if (_location_to_tag.find(i) != _location_to_tag.end()) { tag_data[i] = _location_to_tag[i]; @@ -593,7 +593,7 @@ namespace diskann { size_t tags_file_num_pts = 0, graph_num_pts = 0, data_file_num_pts = 0; if (!_save_as_one_file) { -// For DLVS Store, we will not support saving the index in multiple files. + // For DLVS Store, we will not support saving the index in multiple files. std::string data_file = std::string(filename) + ".data"; std::string tags_file = std::string(filename) + ".tags"; std::string delete_set_file = std::string(filename) + ".del"; @@ -655,7 +655,6 @@ namespace diskann { _change_lock.unlock(); } - template size_t Index::load_graph(std::string filename, size_t expected_num_points) { @@ -2030,7 +2029,7 @@ namespace diskann { T *ret_data = nullptr; size_t allocSize = ((size_t) _nd) * _aligned_dim * sizeof(T); alloc_aligned(((void **) &ret_data), allocSize, 8 * sizeof(T)); - memset(ret_data, 0, allocSize); + memset((void*) ret_data, 0, allocSize); memcpy(ret_data, _data, allocSize); return ret_data; } @@ -2053,7 +2052,8 @@ namespace diskann { return 0; if (_nd == 0) { - memset(_data + (_max_points) *_aligned_dim, 0, _aligned_dim * sizeof(T)); + memset((void *) (_data + (_max_points) *_aligned_dim), 0, + _aligned_dim * sizeof(T)); return 1; } size_t res = calculate_entry_point(); @@ -2422,7 +2422,7 @@ namespace diskann { memcpy((void *) (_data + _aligned_dim * _nd), _data + (size_t) _aligned_dim * _max_points, sizeof(T) * _dim); - memset((_data + (size_t) _aligned_dim * _max_points), 0, + memset((void *) (_data + (size_t) _aligned_dim * _max_points), 0, sizeof(T) * _aligned_dim); } } @@ -2649,7 +2649,7 @@ namespace diskann { memcpy((void *) (_data + (size_t) _aligned_dim * new_location), _data + (size_t) _aligned_dim * old_location, sizeof(T) * _aligned_dim); - memset((_data + (size_t) _aligned_dim * old_location), 0, + memset((void *) (_data + (size_t) _aligned_dim * old_location), 0, sizeof(T) * _aligned_dim); } @@ -2946,7 +2946,7 @@ namespace diskann { << std::endl; return -1; } - std::memset(ret_data, 0, (size_t) _aligned_dim * _nd * sizeof(T)); + std::memset((void*) ret_data, 0, (size_t) _aligned_dim * _nd * sizeof(T)); std::memcpy(ret_data, _data, (size_t) (_aligned_dim) *_nd * sizeof(T)); tag_to_location = _tag_to_location; return 0; @@ -3138,15 +3138,23 @@ namespace diskann { template class Index; template class Index; template class Index; + template class Index; + template class Index; template class Index; template class Index; template class Index; + template class Index; + template class Index; template class Index; template class Index; template class Index; + template class Index; + template class Index; template class Index; template class Index; template class Index; + template class Index; + template class Index; template std::pair Index::search(const float *query, const size_t K, @@ -3174,6 +3182,30 @@ namespace diskann { Index::search(const int8_t *query, const size_t K, const unsigned L, uint32_t *indices, float *distances); + template std::pair + Index::search(const knowhere::fp16 *query, + const size_t K, + const unsigned L, + uint64_t *indices, + float *distances); + template std::pair + Index::search(const knowhere::fp16 *query, + const size_t K, + const unsigned L, + uint32_t *indices, + float *distances); + template std::pair + Index::search(const knowhere::bf16 *query, + const size_t K, + const unsigned L, + uint64_t *indices, + float *distances); + template std::pair + Index::search(const knowhere::bf16 *query, + const size_t K, + const unsigned L, + uint32_t *indices, + float *distances); // TagT==uint32_t template std::pair Index::search(const float *query, const size_t K, @@ -3202,4 +3234,29 @@ namespace diskann { const unsigned L, uint32_t *indices, float *distances); + template std::pair + Index::search(const knowhere::fp16 *query, + const size_t K, + const unsigned L, + uint64_t *indices, + float *distances); + template std::pair + Index::search(const knowhere::fp16 *query, + const size_t K, + const unsigned L, + uint32_t *indices, + float *distances); + template std::pair + Index::search(const knowhere::bf16 *query, + const size_t K, + const unsigned L, + uint64_t *indices, + float *distances); + template std::pair + Index::search(const knowhere::bf16 *query, + const size_t K, + const unsigned L, + uint32_t *indices, + float *distances); + } // namespace diskann diff --git a/thirdparty/DiskANN/src/partition_and_pq.cpp b/thirdparty/DiskANN/src/partition_and_pq.cpp index 5d8ff7220..20afddec9 100644 --- a/thirdparty/DiskANN/src/partition_and_pq.cpp +++ b/thirdparty/DiskANN/src/partition_and_pq.cpp @@ -134,7 +134,7 @@ void gen_random_slice(const std::string data_file, double p_val, if (rnd_val < p_val) { std::vector cur_vector_float; for (size_t d = 0; d < ndims; d++) - cur_vector_float.push_back(cur_vector_T[d]); + cur_vector_float.push_back(float(cur_vector_T[d])); sampled_vectors.push_back(cur_vector_float); } } @@ -170,7 +170,7 @@ void gen_random_slice(const T *inputdata, size_t npts, size_t ndims, if (rnd_val < p_val) { std::vector cur_vector_float; for (size_t d = 0; d < ndims; d++) - cur_vector_float.push_back(cur_vector_T[d]); + cur_vector_float.push_back((float) cur_vector_T[d]); sampled_vectors.push_back(cur_vector_float); } } @@ -178,7 +178,7 @@ void gen_random_slice(const T *inputdata, size_t npts, size_t ndims, sampled_data = new float[slice_size * ndims]; for (size_t i = 0; i < slice_size; i++) { for (size_t j = 0; j < ndims; j++) { - sampled_data[i * ndims + j] = sampled_vectors[i][j]; + sampled_data[i * ndims + j] = (float) sampled_vectors[i][j]; } } } @@ -310,7 +310,7 @@ int generate_pq_pivots(const float *passed_train_data, size_t num_train, full_pivot_data.reset(new float[num_centers * dim]); std::atomic num_chunk_done(0); - const uint32_t num_chunk_step = num_pq_chunks / COMPLETION_PERCENT; + const uint32_t num_chunk_step = num_pq_chunks / COMPLETION_PERCENT; auto thread_pool = knowhere::ThreadPool::GetGlobalBuildThreadPool(); std::vector> futures; futures.reserve(num_pq_chunks); @@ -319,7 +319,7 @@ int generate_pq_pivots(const float *passed_train_data, size_t num_train, if (cur_chunk_size == 0) continue; futures.emplace_back(thread_pool->push([&, chunk_size = cur_chunk_size, - index = i]() { + index = i]() { std::unique_ptr cur_pivot_data = std::make_unique(num_centers * chunk_size); std::unique_ptr cur_data = @@ -539,7 +539,7 @@ int generate_pq_data_from_pivots(const std::string data_file, if (cur_chunk_size == 0) continue; futures.emplace_back(thread_pool->push([&, chunk_size = cur_chunk_size, - chunk_index = i]() { + chunk_index = i]() { std::unique_ptr cur_pivot_data = std::make_unique(num_centers * chunk_size); std::unique_ptr cur_data = @@ -1065,6 +1065,12 @@ template void gen_random_slice(const std::string base_file, template void gen_random_slice(const std::string base_file, const std::string output_file, double sampling_rate); +template void gen_random_slice(const std::string base_file, + const std::string output_file, + double sampling_rate); +template void gen_random_slice(const std::string base_file, + const std::string output_file, + double sampling_rate); template void gen_random_slice(const float *inputdata, size_t npts, size_t ndims, double p_val, @@ -1077,6 +1083,16 @@ template void gen_random_slice(const int8_t *inputdata, size_t npts, size_t ndims, double p_val, float *&sampled_data, size_t &slice_size); +template void gen_random_slice(const knowhere::fp16 *inputdata, + size_t npts, size_t ndims, + double p_val, + float *&sampled_data, + size_t &slice_size); +template void gen_random_slice(const knowhere::bf16 *inputdata, + size_t npts, size_t ndims, + double p_val, + float *&sampled_data, + size_t &slice_size); template void gen_random_slice(const std::string data_file, double p_val, float *&sampled_data, size_t &slice_size, @@ -1087,6 +1103,16 @@ template void gen_random_slice(const std::string data_file, template void gen_random_slice(const std::string data_file, double p_val, float *&sampled_data, size_t &slice_size, size_t &ndims); +template void gen_random_slice(const std::string data_file, + double p_val, + float *&sampled_data, + size_t &slice_size, + size_t &ndims); +template void gen_random_slice(const std::string data_file, + double p_val, + float *&sampled_data, + size_t &slice_size, + size_t &ndims); template int partition(const std::string data_file, const float sampling_rate, size_t num_centers, @@ -1100,6 +1126,12 @@ template int partition(const std::string data_file, const float sampling_rate, size_t num_centers, size_t max_k_means_reps, const std::string prefix_path, size_t k_base); +template int partition( + const std::string data_file, const float sampling_rate, size_t num_centers, + size_t max_k_means_reps, const std::string prefix_path, size_t k_base); +template int partition( + const std::string data_file, const float sampling_rate, size_t num_centers, + size_t max_k_means_reps, const std::string prefix_path, size_t k_base); template int partition_with_ram_budget( const std::string data_file, const double sampling_rate, double ram_budget, @@ -1110,6 +1142,12 @@ template int partition_with_ram_budget( template int partition_with_ram_budget( const std::string data_file, const double sampling_rate, double ram_budget, size_t graph_degree, const std::string prefix_path, size_t k_base); +template int partition_with_ram_budget( + const std::string data_file, const double sampling_rate, double ram_budget, + size_t graph_degree, const std::string prefix_path, size_t k_base); +template int partition_with_ram_budget( + const std::string data_file, const double sampling_rate, double ram_budget, + size_t graph_degree, const std::string prefix_path, size_t k_base); template int retrieve_shard_data_from_ids(const std::string data_file, std::string idmap_filename, @@ -1120,6 +1158,12 @@ template int retrieve_shard_data_from_ids(const std::string data_file, template int retrieve_shard_data_from_ids(const std::string data_file, std::string idmap_filename, std::string data_filename); +template int retrieve_shard_data_from_ids( + const std::string data_file, std::string idmap_filename, + std::string data_filename); +template int retrieve_shard_data_from_ids( + const std::string data_file, std::string idmap_filename, + std::string data_filename); template int generate_pq_data_from_pivots( const std::string data_file, unsigned num_centers, unsigned num_pq_chunks, @@ -1130,3 +1174,9 @@ template int generate_pq_data_from_pivots( template int generate_pq_data_from_pivots( const std::string data_file, unsigned num_centers, unsigned num_pq_chunks, std::string pq_pivots_path, std::string pq_compressed_vectors_path); +template int generate_pq_data_from_pivots( + const std::string data_file, unsigned num_centers, unsigned num_pq_chunks, + std::string pq_pivots_path, std::string pq_compressed_vectors_path); +template int generate_pq_data_from_pivots( + const std::string data_file, unsigned num_centers, unsigned num_pq_chunks, + std::string pq_pivots_path, std::string pq_compressed_vectors_path); diff --git a/thirdparty/DiskANN/src/pq_flash_index.cpp b/thirdparty/DiskANN/src/pq_flash_index.cpp index 945deb7fa..4f83824d3 100644 --- a/thirdparty/DiskANN/src/pq_flash_index.cpp +++ b/thirdparty/DiskANN/src/pq_flash_index.cpp @@ -63,7 +63,7 @@ namespace diskann { diskann::Metric m) : reader(fileReader), metric(m) { if (m == diskann::Metric::INNER_PRODUCT || m == diskann::Metric::COSINE) { - if (!std::is_floating_point::value) { + if (!knowhere::KnowhereFloatTypeCheck::value) { LOG(WARNING) << "Cannot normalize integral data types." << " This may result in erroneous results or poor recall." << " Consider using L2 distance with integral data types."; @@ -134,8 +134,9 @@ namespace diskann { 8 * sizeof(float)); scratch.visited = new tsl::robin_set<_u64>(4096); - memset(scratch.coord_scratch, 0, sizeof(T) * this->aligned_dim); - memset(scratch.aligned_query_T, 0, this->aligned_dim * sizeof(T)); + memset((void *) scratch.coord_scratch, 0, sizeof(T) * this->aligned_dim); + memset((void *) scratch.aligned_query_T, 0, + this->aligned_dim * sizeof(T)); memset(scratch.aligned_query_float, 0, this->aligned_dim * sizeof(float)); ThreadData data; @@ -196,12 +197,12 @@ namespace diskann { auto ctx = this->reader->get_ctx(); nhood_cache_buf = new unsigned[num_cached_nodes * (max_degree + 1)]; - memset(nhood_cache_buf, 0, num_cached_nodes * (max_degree + 1)); + memset((void *) nhood_cache_buf, 0, num_cached_nodes * (max_degree + 1)); _u64 coord_cache_buf_len = num_cached_nodes * aligned_dim; diskann::alloc_aligned((void **) &coord_cache_buf, coord_cache_buf_len * sizeof(T), 8 * sizeof(T)); - memset(coord_cache_buf, 0, coord_cache_buf_len * sizeof(T)); + memset((void *) coord_cache_buf, 0, coord_cache_buf_len * sizeof(T)); size_t BLOCK_SIZE = 32; size_t num_blocks = DIV_ROUND_UP(num_cached_nodes, BLOCK_SIZE); @@ -568,7 +569,6 @@ namespace diskann { get_bin_metadata(pq_table_bin, pq_file_num_centroids, pq_file_dim); this->disk_index_file = disk_index_file; - if (pq_file_num_centroids != 256) { LOG(ERROR) << "Error. Number of PQ centroids is not 256. Exitting."; return -1; @@ -764,9 +764,9 @@ namespace diskann { q_dim--; } for (uint32_t i = 0; i < q_dim; i++) { - data.scratch.aligned_query_float[i] = query1[i]; + data.scratch.aligned_query_float[i] = (float) query1[i]; data.scratch.aligned_query_T[i] = query1[i]; - query_norm += query1[i] * query1[i]; + query_norm += (float) query1[i] * (float) query1[i]; } // if inner product, we also normalize the query and set the last coordinate @@ -782,7 +782,8 @@ namespace diskann { data.scratch.aligned_query_float[this->data_dim - 1] = 0; } for (uint32_t i = 0; i < q_dim; i++) { - data.scratch.aligned_query_T[i] /= query_norm; + data.scratch.aligned_query_T[i] = + (T)((float) data.scratch.aligned_query_T[i] / query_norm); data.scratch.aligned_query_float[i] /= query_norm; } } @@ -967,7 +968,7 @@ namespace diskann { filter_ratio_in < 0 ? kFilterThreshold : filter_ratio_in; bv_cnt = bitset_view.count(); #ifdef NOT_COMPILE_FOR_SWIG - double ratio = ((double)bv_cnt) / bitset_view.size(); + double ratio = ((double) bv_cnt) / bitset_view.size(); knowhere::knowhere_diskann_bitset_ratio.Observe(ratio); #endif if (bitset_view.size() == bv_cnt) { @@ -1333,7 +1334,6 @@ namespace diskann { return left.distance < right.distance; }); } - // copy k_search values for (_u64 i = 0; i < k_search; i++) { if (i >= full_retset.size()) { @@ -1436,7 +1436,8 @@ namespace diskann { const auto original_dim = data_dim - 1; memcpy(des + des_idx * original_dim, src, original_dim * sizeof(T)); for (size_t i = 0; i < original_dim; ++i) { - des[des_idx * original_dim + i] *= max_base_norm; + des[des_idx * original_dim + i] = + (T) (max_base_norm * (float) des[des_idx * original_dim + i]); } } else { memcpy(des + des_idx * data_dim, src, data_dim * sizeof(T)); @@ -1623,5 +1624,7 @@ namespace diskann { template class PQFlashIndex<_u8>; template class PQFlashIndex<_s8>; template class PQFlashIndex; + template class PQFlashIndex; + template class PQFlashIndex; } // namespace diskann