Skip to content

Commit

Permalink
BANN single file Save and Load.
Browse files Browse the repository at this point in the history
  • Loading branch information
REDMOND\ninchen committed Jan 12, 2024
1 parent ee62d8d commit df84a6d
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 46 deletions.
8 changes: 8 additions & 0 deletions include/abstract_graph_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <vector>
#include "types.h"

class AlignedFileReader;

namespace diskann
{

Expand All @@ -21,8 +23,14 @@ class AbstractGraphStore
virtual ~AbstractGraphStore() = default;

// returns tuple of <nodes_read, start, num_frozen_points>
#ifdef EXEC_ENV_OLS
virtual std::tuple<uint32_t, uint32_t, size_t> load(AlignedFileReader &reader, const size_t num_points,
size_t offset) = 0;
#else
virtual std::tuple<uint32_t, uint32_t, size_t> load(const std::string &index_path_prefix, const size_t num_points,
size_t offset) = 0;
#endif

virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_fz_points,
const uint32_t start) = 0;

Expand Down
2 changes: 1 addition & 1 deletion include/defaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const uint32_t NUM_FROZEN_POINTS_STATIC = 0;
const uint32_t NUM_FROZEN_POINTS_DYNAMIC = 1;

// In-mem index related limits
const float GRAPH_SLACK_FACTOR = 1.3;
const float GRAPH_SLACK_FACTOR = 1.3f;

// SSD Index related limits
const uint64_t MAX_GRAPH_DEGREE = 512;
Expand Down
12 changes: 9 additions & 3 deletions include/in_mem_graph_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ class InMemGraphStore : public AbstractGraphStore
InMemGraphStore(const size_t total_pts, const size_t reserve_graph_degree);

// returns tuple of <nodes_read, start, num_frozen_points>
virtual std::tuple<uint32_t, uint32_t, size_t> load(const std::string &index_path_prefix, const size_t num_points,
#ifdef EXEC_ENV_OLS
virtual std::tuple<uint32_t, uint32_t, size_t> load(AlignedFileReader &reader, const size_t num_points,
size_t offset) override;
#else
virtual std::tuple<uint32_t, uint32_t, size_t> load(const std::string &filename, size_t expected_num_points,
size_t offset);
#endif
virtual int store(const std::string &index_path_prefix, const size_t num_points, const size_t num_frozen_points,
const uint32_t start) override;
virtual int store(std::ofstream &writer, const size_t num_points, const size_t num_fz_points, const uint32_t start,
Expand All @@ -34,11 +39,12 @@ class InMemGraphStore : public AbstractGraphStore
virtual uint32_t get_max_observed_degree() override;

protected:
virtual std::tuple<uint32_t, uint32_t, size_t> load_impl(const std::string &filename, size_t expected_num_points,
size_t offset);
#ifdef EXEC_ENV_OLS
virtual std::tuple<uint32_t, uint32_t, size_t> load_impl(AlignedFileReader &reader, size_t expected_num_points,
size_t offset);
#else
virtual std::tuple<uint32_t, uint32_t, size_t> load_impl(const std::string &filename, size_t expected_num_points,
size_t offset);
#endif

int save_graph(std::ofstream &writer, const size_t active_points, const size_t num_frozen_points,
Expand Down
2 changes: 2 additions & 0 deletions include/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ struct SaveLoadMetaDataV1
uint64_t delete_list_offset;
uint64_t tags_offset;
uint64_t graph_offset;

SaveLoadMetaDataV1();
};

inline double estimate_ram_usage(size_t size, uint32_t dim, uint32_t datasize, uint32_t degree)
Expand Down
16 changes: 8 additions & 8 deletions include/parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,7 @@ class IndexWriteParameters

{
public:
const uint32_t search_list_size; // L
const uint32_t max_degree; // R
const bool saturate_graph;
const uint32_t max_occlusion_size; // C
const float alpha;
const uint32_t num_threads;
const uint32_t filter_list_size; // Lf

private:
IndexWriteParameters(const uint32_t search_list_size, const uint32_t max_degree, const bool saturate_graph,
const uint32_t max_occlusion_size, const float alpha, const uint32_t num_threads,
const uint32_t filter_list_size)
Expand All @@ -34,6 +26,14 @@ class IndexWriteParameters
{
}

const uint32_t search_list_size; // L
const uint32_t max_degree; // R
const bool saturate_graph;
const uint32_t max_occlusion_size; // C
const float alpha;
const uint32_t num_threads;
const uint32_t filter_list_size; // Lf

friend class IndexWriteParametersBuilder;
};

Expand Down
15 changes: 13 additions & 2 deletions src/in_mem_graph_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "in_mem_graph_store.h"
#include "utils.h"


namespace diskann
{
InMemGraphStore::InMemGraphStore(const size_t total_pts, const size_t reserve_graph_degree)
Expand All @@ -16,11 +17,21 @@ InMemGraphStore::InMemGraphStore(const size_t total_pts, const size_t reserve_gr
}
}

#ifdef EXEC_ENV_OLS
std::tuple<uint32_t, uint32_t, size_t> InMemGraphStore::load(AlignedFileReader &reader,
const size_t num_points, size_t offset)
{

return load_impl(reader, num_points, offset);
}
#else
std::tuple<uint32_t, uint32_t, size_t> InMemGraphStore::load(const std::string &index_path_prefix,
const size_t num_points, size_t offset)
{

return load_impl(index_path_prefix, num_points, offset);
}
#endif
int InMemGraphStore::store(const std::string &index_path_prefix, const size_t num_points,
const size_t num_frozen_points, const uint32_t start)
{
Expand Down Expand Up @@ -90,7 +101,6 @@ std::tuple<uint32_t, uint32_t, size_t> InMemGraphStore::load_impl(AlignedFileRea
size_t file_frozen_pts;
uint32_t start;

auto max_points = get_max_points();
int header_size = 2 * sizeof(size_t) + 2 * sizeof(uint32_t);
std::unique_ptr<char[]> header = std::make_unique<char[]>(header_size);
read_array(reader, header.get(), header_size, offset);
Expand Down Expand Up @@ -143,8 +153,8 @@ std::tuple<uint32_t, uint32_t, size_t> InMemGraphStore::load_impl(AlignedFileRea
<< std::endl;
return std::make_tuple(nodes_read, start, file_frozen_pts);
}
#endif

#else
std::tuple<uint32_t, uint32_t, size_t> InMemGraphStore::load_impl(const std::string &filename,
size_t expected_num_points, size_t offset)
{
Expand Down Expand Up @@ -208,6 +218,7 @@ std::tuple<uint32_t, uint32_t, size_t> InMemGraphStore::load_impl(const std::str
<< std::endl;
return std::make_tuple(nodes_read, start, file_frozen_pts);
}
#endif

int InMemGraphStore::save_graph(std::ofstream &writer, const size_t num_points, const size_t num_frozen_points,
const uint32_t start, size_t offset)
Expand Down
97 changes: 66 additions & 31 deletions src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@

namespace diskann
{
SaveLoadMetaDataV1::SaveLoadMetaDataV1() : data_offset(0), delete_list_offset(0), tags_offset(0), graph_offset(0)
{
}


// Initialize an index with metric m, load the data of type T with filename
// (bin), and initialize max_points
template <typename T, typename TagT, typename LabelT>
Expand Down Expand Up @@ -411,16 +416,16 @@ void Index<T, TagT, LabelT>::save(const char *filename, bool compact_before_save
curr_pos += sizeof(SaveLoadMetaDataV1);

// Save data.
metadata.data_offset = static_cast<uint64_t>(curr_pos);
curr_pos += _data_store->save(writer, (location_t)(_nd + _num_frozen_pts), curr_pos);
{
metadata.data_offset = static_cast<uint64_t>(curr_pos);
curr_pos += _data_store->save(writer, (location_t)(_nd + _num_frozen_pts), curr_pos);
}

// Save delete list.
{
if (_delete_set->size() == 0)
{
metadata.delete_list_offset = static_cast<uint64_t>(curr_pos);
}
else
metadata.delete_list_offset = static_cast<uint64_t>(curr_pos);

if (_delete_set->size() != 0)
{
std::unique_ptr<uint32_t[]> delete_list = std::make_unique<uint32_t[]>(_delete_set->size());
uint32_t i = 0;
Expand All @@ -434,12 +439,9 @@ void Index<T, TagT, LabelT>::save(const char *filename, bool compact_before_save

// Save tags.
{
if (!_enable_tags)
{
diskann::cout << "Not saving tags as they are not enabled." << std::endl;
metadata.tags_offset = static_cast<uint64_t>(curr_pos);
}
else
metadata.tags_offset = static_cast<uint64_t>(curr_pos);

if (_enable_tags)
{
TagT *tag_data = new TagT[_nd + _num_frozen_pts];
for (uint32_t i = 0; i < _nd; i++)
Expand All @@ -466,17 +468,24 @@ void Index<T, TagT, LabelT>::save(const char *filename, bool compact_before_save
}

// Save graph.
metadata.graph_offset = static_cast<uint64_t>(curr_pos);
curr_pos += _graph_store->store(writer, _nd + _num_frozen_pts, _num_frozen_pts, _start, curr_pos);
{
metadata.graph_offset = static_cast<uint64_t>(curr_pos);
_graph_store->store(writer, _nd + _num_frozen_pts, _num_frozen_pts, _start, curr_pos);

// Save metadata.
writer.seekp(meta_data_start, writer.beg);
writer.write((char *)&metadata, sizeof(SaveLoadMetaDataV1));
writer.close();
// Save metadata.
writer.seekp(meta_data_start, writer.beg);
writer.write((char *)&metadata, sizeof(SaveLoadMetaDataV1));
writer.close();
}

std::cout << "Metadata Saved. data_offset: " << std::to_string(metadata.data_offset)
<< " delete_list_offset: " << std::to_string(metadata.delete_list_offset)
<< " tag_offset: " << std::to_string(metadata.tags_offset)
<< " graph_offset: " << std::to_string(metadata.graph_offset) << std::endl;
}
else
{
diskann::cout << "Save index in a single file currently only support _save_as_one_file_version = 1. "
std::cout << "Save index in a single file currently only support _save_as_one_file_version = 1. "
"Not saving the index."
<< std::endl;
}
Expand All @@ -487,7 +496,7 @@ void Index<T, TagT, LabelT>::save(const char *filename, bool compact_before_save
// _max_points.
reposition_frozen_point_to_end();

diskann::cout << "Time taken for save: " << timer.elapsed() / 1000000.0 << "s." << std::endl;
std::cout << "Time taken for save: " << timer.elapsed() / 1000000.0 << "s." << std::endl;
}

#ifdef EXEC_ENV_OLS
Expand Down Expand Up @@ -647,6 +656,7 @@ void Index<T, TagT, LabelT>::load(const char *filename, uint32_t num_threads, ui
#endif
if (!_load_from_one_file)
{
std::cout << "DLVS should not load multiple files." << std::endl;
// For DLVS Store, we will not support saving the index in multiple
// files.
#ifndef EXEC_ENV_OLS
Expand All @@ -670,15 +680,18 @@ void Index<T, TagT, LabelT>::load(const char *filename, uint32_t num_threads, ui
{
if (_filtered_index)
{
diskann::cout << "Single index file saving/loading support for filtered index is not yet "
std::cout << "Single index file saving/loading support for filtered index is not yet "
"enabled. Not loading the index."
<< std::endl;
}
else
{
uint64_t version;
std::cout << "Start loading index from one file." << std::endl;
uint64_t version = 0;

#ifdef EXEC_ENV_OLS
std::cout << "Start Version Check." << std::endl;

std::vector<AlignedRead> readReqs;
AlignedRead readReq;
uint64_t buf[1];
Expand All @@ -687,46 +700,67 @@ void Index<T, TagT, LabelT>::load(const char *filename, uint32_t num_threads, ui
readReq.offset = 0;
readReq.len = sizeof(uint64_t);
readReqs.push_back(readReq);
std::cout << "Load Version request is ready." << std::endl;

reader.read(readReqs, ctx); // synchronous
if ((*(ctx.m_pRequestsStatus))[0] == IOContext::READ_SUCCESS)
std::cout << "Load Version processed." << std::endl;

if ((*(ctx.m_pRequestsStatus.get()))[0] == IOContext::READ_SUCCESS)
{
version = buf[0];
std::cout << "Load Version is " << std::to_string(version) << "." << std::endl;
}
else
{
std::stringstream str;
str << "Could not read binary metadata from index file at offset: 0." << std::endl;
std::cout << str.str() << std::endl;
throw diskann::ANNException(str.str(), -1, __FUNCSIG__, __FILE__, __LINE__);
}

#else
std::ifstream reader(filename, std::ios::binary);
reader.read((char *)&version, sizeof(uint64_t));
#endif

if (version == _load_from_one_file_version)
{
std::cout << "Version Check passed, start loading meta data." << std::endl;
SaveLoadMetaDataV1 metadata;

#ifdef EXEC_ENV_OLS
std::vector<AlignedRead> metadata_readReqs;
AlignedRead metadata_readReq;
uint64_t metadata_buf[1];
uint64_t metadata_buf[sizeof(SaveLoadMetaDataV1)];

metadata_readReq.buf = metadata_buf;
metadata_readReq.offset = sizeof(uint64_t);
metadata_readReq.len = sizeof(SaveLoadMetaDataV1);
metadata_readReq.push_back(readReq);
metadata_readReqs.push_back(metadata_readReq);
reader.read(metadata_readReqs, ctx); // synchronous
if ((*(ctx.m_pRequestsStatus))[0] == IOContext::READ_SUCCESS)
{
memcpy((void *)&metadata, (void *)buf, sizeof(SaveLoadMetaDataV1));
}

std::cout << "Metadata loaded. data_offset: " << std::to_string(metadata.data_offset)
<< " delete_list_offset: " << std::to_string(metadata.delete_list_offset)
<< " tag_offset: " << std::to_string(metadata.tags_offset)
<< " graph_offset: " << std::to_string(metadata.graph_offset)
<< std::endl;

#else
reader.read((char *)&metadata, sizeof(SaveLoadMetaDataV1));
#endif
// Load data
#ifdef EXEC_ENV_OLS
load_data(reader, metadata.data_offset)
load_data(reader, metadata.data_offset);
#else
load_data(filename, metadata.data_offset);
#endif

// Load delete list when presents.
if (metadata.data_offset != metadata.delete_list_offset)
if (metadata.data_offset != metadata.delete_list_offset)
{
#ifdef EXEC_ENV_OLS
load_delete_set(reader, metadata.delete_list_offset);
Expand All @@ -752,12 +786,11 @@ void Index<T, TagT, LabelT>::load(const char *filename, uint32_t num_threads, ui
}
else
{
diskann::cout << "load index from a single file currently only support _save_as_one_file_version = 1. "
std::cout << "load index from a single file currently only support _save_as_one_file_version = 1. "
"Not loading the index."
<< std::endl;
}
}
return;
}

if (data_file_num_pts != graph_num_pts || (data_file_num_pts != tags_file_num_pts && _enable_tags))
Expand Down Expand Up @@ -866,13 +899,15 @@ size_t Index<T, TagT, LabelT>::get_graph_num_frozen_points(const std::string &gr
template <typename T, typename TagT, typename LabelT>
size_t Index<T, TagT, LabelT>::load_graph(AlignedFileReader &reader, size_t expected_num_points, size_t offset)
{
auto res = _graph_store->load(reader, expected_num_points, offset);

#else

template <typename T, typename TagT, typename LabelT>
size_t Index<T, TagT, LabelT>::load_graph(std::string filename, size_t expected_num_points, size_t offset)
{
#endif
auto res = _graph_store->load(filename, expected_num_points, offset);
#endif
_start = std::get<1>(res);
_num_frozen_pts = std::get<2>(res);
return std::get<0>(res);
Expand Down
2 changes: 1 addition & 1 deletion src/pq_flash_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ int PQFlashIndex<T, LabelT>::load_from_separate_paths(uint32_t num_threads, cons
{
uint64_t dumr, dumc;
float *norm_val;
diskann::load_bin<float>(files, norm_val, dumr, dumc);
diskann::load_bin<float>(files, norm_file, norm_val, dumr, dumc);
#else
if (file_exists(norm_file) && metric == diskann::Metric::INNER_PRODUCT)
{
Expand Down

0 comments on commit df84a6d

Please sign in to comment.