Skip to content

Commit

Permalink
LLAMA, materialization step with a vector
Browse files Browse the repository at this point in the history
  • Loading branch information
whatsthecraic committed Apr 23, 2021
1 parent a13eb61 commit 6fa773d
Show file tree
Hide file tree
Showing 14 changed files with 124 additions and 366 deletions.
4 changes: 2 additions & 2 deletions library/baseline/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ void CSR::dump_ostream(std::ostream& out) const {
* *
*****************************************************************************/
template <typename T>
vector<pair<uint64_t, T>> CSR::translate(T* values, uint64_t N) {
vector<pair<uint64_t, T>> CSR::translate(const T* __restrict values, uint64_t N) {
vector<pair<uint64_t , T>> logical_result(N);

#pragma omp parallel for
Expand All @@ -449,7 +449,7 @@ vector<pair<uint64_t, T>> CSR::translate(T* values, uint64_t N) {
}

template <typename T, bool negative_scores>
void CSR::save_results(vector<pair<uint64_t, T>> &result, const char *dump2file) {
void CSR::save_results(const vector<pair<uint64_t, T>>& result, const char* dump2file) {
assert(dump2file != nullptr);
COUT_DEBUG("save the results to: " << dump2file);

Expand Down
4 changes: 2 additions & 2 deletions library/baseline/csr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,11 @@ class CSR : public virtual LoaderInterface, public virtual RandomVertexInterface
protected:
// Helper, translate the logical into real vertices IDs. Materialization step at the end of a graphalytics algorithm
template <typename T>
std::vector<std::pair<uint64_t, T>> translate(T* values, uint64_t N);
std::vector<std::pair<uint64_t, T>> translate(const T* __restrict values, uint64_t N);

// Helper, save the content of the vector to the given output file
template <typename T, bool negative_scores = true>
void save_results(std::vector<std::pair<uint64_t, T>>& result, const char* dump2file);
void save_results(const std::vector<std::pair<uint64_t, T>>& result, const char* dump2file);

public:
/**
Expand Down
9 changes: 5 additions & 4 deletions library/interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,11 @@ vector<ImplementationManifest> implementations() {
// v5 12/06/2020: OMP dynamic scheduling in the Graphalytics kernels
// v6 25/06/2020: Updates, implicitly create a vertex referred in a new edge upon first reference with the method add_edge_v2
// v7 14/04/2021: Fix the predicate in the TimeoutService
result.emplace_back("llama7", "LLAMA library", &generate_llama);
result.emplace_back("llama7-dv", "LLAMA with dense vertices", &generate_llama_dv);
result.emplace_back("llama7-dv-nobw", "LLAMA with dense vertices, no blind writes", &generate_llama_dv_nobw);
result.emplace_back("llama7-ref", "LLAMA with the GAPBS ref impl.", &generate_llama_ref);
// v8 23/04/2021: Materialization step with a vector
result.emplace_back("llama8", "LLAMA library", &generate_llama);
result.emplace_back("llama8-dv", "LLAMA with dense vertices", &generate_llama_dv);
result.emplace_back("llama8-dv-nobw", "LLAMA with dense vertices, no blind writes", &generate_llama_dv_nobw);
result.emplace_back("llama8-ref", "LLAMA with the GAPBS ref impl.", &generate_llama_ref);
#endif

#if defined(HAVE_STINGER)
Expand Down
24 changes: 8 additions & 16 deletions library/llama/llama_bfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -985,15 +985,12 @@ void LLAMAClass::bfs(uint64_t external_source_vertex_id, const char* dump2file)
bfs_bfs<ll_mlcsr_ro_graph> instance{ graph, llama_source_vertex_id, is_undirected() };
instance.prepare(llama_source_vertex_id);
instance.do_bfs_forward(timeout);

if(timeout.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
}
if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

// translate from llama vertex ids to external vertex ids
auto names = graph.get_node_property_64(g_llama_property_names);
assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
cuckoohash_map</* external id */ uint64_t, /* distance */ int> external_ids;
vector<pair<uint64_t, int64_t>> external_ids (graph.max_nodes());
#pragma omp parallel for
for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
// first, does this node exist (or it's a gap?)
Expand All @@ -1004,27 +1001,22 @@ void LLAMAClass::bfs(uint64_t external_source_vertex_id, const char* dump2file)
uint64_t external_node_id = names->get(llama_node_id);

// third, its distance
int distance = instance.get_level(llama_node_id);
int64_t distance = instance.get_level(llama_node_id);

// finally, register the association
external_ids.insert(external_node_id, distance);
}

if(timeout.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
external_ids[llama_node_id] = make_pair(external_node_id, distance);
}
if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

// store the results in the given file
if(dump2file != nullptr){
COUT_DEBUG("save the results to: " << dump2file)
fstream handle(dump2file, ios_base::out);
if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");

auto hashtable = external_ids.lock_table();

for(const auto& keyvaluepair : hashtable){
handle << keyvaluepair.first << " ";
auto distance = keyvaluepair.second;
for(const auto& p : external_ids){
handle << p.first << " ";
auto distance = p.second;
if(distance != decltype(instance)::__INVALID_LEVEL){
handle << distance;
} else {
Expand Down
48 changes: 6 additions & 42 deletions library/llama/llama_cdlp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,56 +67,20 @@ void LLAMAClass::cdlp(uint64_t max_iterations, const char* dump2file){
// dump_snapshot(graph);
slock.unlock(); // here we lose the ability to refer to m_vmap_read_only from now on

// execute the CDLP algortihm
// execute the CDLP algorithm
unique_ptr<uint64_t[]> ptr_labels = cdlp_impl(timeout, graph, max_iterations);
uint64_t* labels = ptr_labels.get();

if(timeout.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
}
if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

// translate from llama vertex ids to external vertex ids
auto names = graph.get_node_property_64(g_llama_property_names);
assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
cuckoohash_map</* external id */ uint64_t, /* score */ uint64_t> external_ids;
#pragma omp parallel for
for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
// first, does this node exist (or it's a gap?)
// this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
if(!graph.node_exists(llama_node_id)) continue;

// second, what's it's real node ID, in the external domain (e.g. user id)
uint64_t external_node_id = names->get(llama_node_id);

// third, its label
uint64_t label = labels[llama_node_id];

// finally, register the association
external_ids.insert(external_node_id, label);
}

if(timeout.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
}
auto external_ids = translate(graph, ptr_labels.get());
if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

#if defined(LL_COUNTERS)
ll_print_counters(stdout);
#endif

// store the results in the given file
if(dump2file != nullptr){
COUT_DEBUG("save the results to: " << dump2file)
fstream handle(dump2file, ios_base::out);
if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");

auto hashtable = external_ids.lock_table();

for(const auto& keyvaluepair : hashtable){
handle << keyvaluepair.first << " " << keyvaluepair.second << "\n";
}

handle.close();
}
if(dump2file != nullptr) // store the results in the given file
save_results(external_ids, dump2file);
}

unique_ptr<uint64_t[]> LLAMAClass::cdlp_impl(TimeoutService& timer, ll_mlcsr_ro_graph& graph, uint64_t max_iterations){
Expand Down
64 changes: 64 additions & 0 deletions library/llama/llama_class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llama_internal.hpp"

#include <cmath>
#include <fstream>
#include <iostream>
#include <mutex>
#include <shared_mutex> // shared_lock
Expand Down Expand Up @@ -632,6 +633,69 @@ void LLAMAClass::updates_stop(){
}
#endif

/*****************************************************************************
* *
* Graphalytics Helpers *
* *
*****************************************************************************/

template <typename T>
vector<pair<uint64_t, T>> LLAMAClass::translate(ll_mlcsr_ro_graph& graph, const T* __restrict data) {
const node_t N = graph.max_nodes(); // this is already a bit of a stretch
auto names = graph.get_node_property_64(g_llama_property_names);
vector<pair<uint64_t, T>> output(N);

#pragma omp parallel for
for(node_t llama_node_id = 0; llama_node_id < N; llama_node_id++){
// first, does this node exist (or it's a gap?)
// this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
if(!graph.node_exists(llama_node_id)) continue;

// second, what's it's real node ID, in the external domain (e.g. user id)
uint64_t external_node_id = names->get(llama_node_id);

// third, its score
T score = data[llama_node_id];

// finally, register the association
output[llama_node_id] = make_pair(external_node_id, score);
}

return output;
}

template <typename T, bool negative_scores>
void LLAMAClass::save_results(const vector<pair<uint64_t, T>>& result, const char* dump2file) {
assert(dump2file != nullptr);
COUT_DEBUG("save the results to: " << dump2file);

fstream handle(dump2file, ios_base::out);
if (!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");

for (const auto &p : result) {
handle << p.first << " ";

if(!negative_scores && p.second < 0){
handle << numeric_limits<T>::max();
} else {
handle << p.second;
}

handle << "\n";
}

handle.close();
}

// Explicitly instantiate the templates
#define INSTANTIATE_TRANSLATE( TYPE ) template vector<pair<uint64_t, TYPE>> LLAMAClass::translate<TYPE>(ll_mlcsr_ro_graph& graph, const TYPE* __restrict data); \
template void LLAMAClass::save_results<TYPE, true>(const vector<pair<uint64_t, TYPE>>& result, const char* dump2file); \
template void LLAMAClass::save_results<TYPE, false>(const vector<pair<uint64_t, TYPE>>& result, const char* dump2file);

INSTANTIATE_TRANSLATE( int64_t );
INSTANTIATE_TRANSLATE( uint64_t );
INSTANTIATE_TRANSLATE( double );

/*****************************************************************************
* *
* Dump *
Expand Down
7 changes: 7 additions & 0 deletions library/llama/llama_class.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ class LLAMAClass : public virtual UpdateInterface, public virtual GraphalyticsIn
// Internal implementation of the CDLP algorithm
std::unique_ptr<uint64_t[]> cdlp_impl(utility::TimeoutService& timer, ll_mlcsr_ro_graph& graph, uint64_t max_iterations);

// Helper for Graphalytics: translate the logical IDs into external IDs
template <typename T>
std::vector<std::pair<uint64_t, T>> translate(ll_mlcsr_ro_graph& graph, const T* __restrict data);

// Helper, save the content of the vector to the given output file
template <typename T, bool negative_scores = true>
void save_results(const std::vector<std::pair<uint64_t, T>>& result, const char* dump2file);
public:
/**
* Constructor
Expand Down
45 changes: 5 additions & 40 deletions library/llama/llama_lcc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,53 +434,18 @@ void LLAMAClass::lcc(const char* dump2file){
} else {
llama_execute_lcc_undirected(timeout, graph, /* output */ scores);
}

if(timeout.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
}
if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

// translate from llama vertex ids to external vertex ids
auto names = graph.get_node_property_64(g_llama_property_names);
assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
cuckoohash_map</* external id */ uint64_t, /* score */ double> external_ids;
#pragma omp parallel for
for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
// first, does this node exist (or it's a gap?)
// this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
if(!graph.node_exists(llama_node_id)) continue;

// second, what's it's real node ID, in the external domain (e.g. user id)
uint64_t external_node_id = names->get(llama_node_id);

// third, its score
double score = scores[llama_node_id];

// finally, register the association
external_ids.insert(external_node_id, score);
}

if(timeout.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
}
auto external_ids = translate(graph, scores);
if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

#if defined(LL_COUNTERS)
ll_print_counters(stdout);
#endif

// store the results in the given file
if(dump2file != nullptr){
COUT_DEBUG("save the results to: " << dump2file)
fstream handle(dump2file, ios_base::out);
if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");

auto hashtable = external_ids.lock_table();

for(const auto& keyvaluepair : hashtable){
handle << keyvaluepair.first << " " << keyvaluepair.second << "\n";
}

handle.close();
}
if(dump2file != nullptr) // store the results in the given file
save_results(external_ids, dump2file);
}

} // namespace
Expand Down
45 changes: 5 additions & 40 deletions library/llama/llama_pagerank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,49 +68,14 @@ void LLAMAClass::pagerank(uint64_t num_iterations, double damping_factor, const
unique_ptr<double[]> ptr_rank { new double[graph.max_nodes()] };
double* rank = ptr_rank.get();
pagerank_impl(timeout_srv, graph, current_num_vertices, num_iterations, damping_factor, /* output */ rank);

if(timeout_srv.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
}
if(timeout_srv.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

// translate from llama ids to external vertex ids
auto names = graph.get_node_property_64(g_llama_property_names);
assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
cuckoohash_map</* external id */ uint64_t, /* distance */ double> external_ids;
#pragma omp parallel for
for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
// first, does this node exist (or it's a gap?)
// this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
if(!graph.node_exists(llama_node_id)) continue;

// second, what's it's real node ID, in the external domain (e.g. user id)
uint64_t external_node_id = names->get(llama_node_id);

// third, its distance
double distance = rank[llama_node_id];

// finally, register the association
external_ids.insert(external_node_id, distance);
}
auto external_ids = translate(graph, rank);
if(timeout_srv.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }

if(timeout_srv.is_timeout()){
RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
}

// store the results in the given file
if(dump2file != nullptr){
COUT_DEBUG("save the results to: " << dump2file)
fstream handle(dump2file, ios_base::out);
if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");

auto hashtable = external_ids.lock_table();

for(const auto& keyvaluepair : hashtable){
handle << keyvaluepair.first << " " << keyvaluepair.second << "\n";
}

handle.close();
}
if(dump2file != nullptr) // store the results in the given file
save_results(external_ids, dump2file);
}

// Implementation derived from llama/benchmark/benchmarks/pagerank.h, class ll_b_pagerank_pull_ext
Expand Down
Loading

0 comments on commit 6fa773d

Please sign in to comment.