LLAMA, materialization step with a vector

cwida · Apr 23, 2021 · 6fa773d · 6fa773d
1 parent a13eb61
commit 6fa773d
Show file tree

Hide file tree

Showing 14 changed files with 124 additions and 366 deletions.
diff --git a/library/baseline/csr.cpp b/library/baseline/csr.cpp
@@ -438,7 +438,7 @@ void CSR::dump_ostream(std::ostream& out) const {
  *                                                                           *
  *****************************************************************************/
 template <typename T>
-vector<pair<uint64_t, T>> CSR::translate(T* values, uint64_t N) {
+vector<pair<uint64_t, T>> CSR::translate(const T* __restrict values, uint64_t N) {
     vector<pair<uint64_t , T>> logical_result(N);
 
     #pragma omp parallel for
@@ -449,7 +449,7 @@ vector<pair<uint64_t, T>> CSR::translate(T* values, uint64_t N) {
 }
 
 template <typename T, bool negative_scores>
-void CSR::save_results(vector<pair<uint64_t, T>> &result, const char *dump2file) {
+void CSR::save_results(const vector<pair<uint64_t, T>>& result, const char* dump2file) {
     assert(dump2file != nullptr);
     COUT_DEBUG("save the results to: " << dump2file);
 

diff --git a/library/baseline/csr.hpp b/library/baseline/csr.hpp
@@ -110,11 +110,11 @@ class CSR : public virtual LoaderInterface, public virtual RandomVertexInterface
 protected:
     // Helper, translate the logical into real vertices IDs. Materialization step at the end of a graphalytics algorithm
     template <typename T>
-    std::vector<std::pair<uint64_t, T>> translate(T* values, uint64_t N);
+    std::vector<std::pair<uint64_t, T>> translate(const T* __restrict values, uint64_t N);
 
     // Helper, save the content of the vector to the given output file
     template <typename T, bool negative_scores = true>
-    void save_results(std::vector<std::pair<uint64_t, T>>& result, const char* dump2file);
+    void save_results(const std::vector<std::pair<uint64_t, T>>& result, const char* dump2file);
 
 public:
     /**

diff --git a/library/interface.cpp b/library/interface.cpp
@@ -236,10 +236,11 @@ vector<ImplementationManifest> implementations() {
     // v5 12/06/2020: OMP dynamic scheduling in the Graphalytics kernels
     // v6 25/06/2020: Updates, implicitly create a vertex referred in a new edge upon first reference with the method add_edge_v2
     // v7 14/04/2021: Fix the predicate in the TimeoutService
-    result.emplace_back("llama7", "LLAMA library", &generate_llama);
-    result.emplace_back("llama7-dv", "LLAMA with dense vertices", &generate_llama_dv);
-    result.emplace_back("llama7-dv-nobw", "LLAMA with dense vertices, no blind writes", &generate_llama_dv_nobw);
-    result.emplace_back("llama7-ref", "LLAMA with the GAPBS ref impl.", &generate_llama_ref);
+    // v8 23/04/2021: Materialization step with a vector
+    result.emplace_back("llama8", "LLAMA library", &generate_llama);
+    result.emplace_back("llama8-dv", "LLAMA with dense vertices", &generate_llama_dv);
+    result.emplace_back("llama8-dv-nobw", "LLAMA with dense vertices, no blind writes", &generate_llama_dv_nobw);
+    result.emplace_back("llama8-ref", "LLAMA with the GAPBS ref impl.", &generate_llama_ref);
 #endif
 
 #if defined(HAVE_STINGER)

diff --git a/library/llama/llama_bfs.cpp b/library/llama/llama_bfs.cpp
@@ -985,15 +985,12 @@ void LLAMAClass::bfs(uint64_t external_source_vertex_id, const char* dump2file)
     bfs_bfs<ll_mlcsr_ro_graph> instance{ graph, llama_source_vertex_id, is_undirected() };
     instance.prepare(llama_source_vertex_id);
     instance.do_bfs_forward(timeout);
-
-    if(timeout.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
-    }
+    if(timeout.is_timeout()){  RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
     // translate from llama vertex ids to external vertex ids
     auto names = graph.get_node_property_64(g_llama_property_names);
     assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
-    cuckoohash_map</* external id */ uint64_t, /* distance */ int> external_ids;
+    vector<pair<uint64_t, int64_t>> external_ids (graph.max_nodes());
     #pragma omp parallel for
     for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
         // first, does this node exist (or it's a gap?)
@@ -1004,27 +1001,22 @@ void LLAMAClass::bfs(uint64_t external_source_vertex_id, const char* dump2file)
         uint64_t external_node_id = names->get(llama_node_id);
 
         // third, its distance
-        int distance = instance.get_level(llama_node_id);
+        int64_t distance = instance.get_level(llama_node_id);
 
         // finally, register the association
-        external_ids.insert(external_node_id, distance);
-    }
-
-    if(timeout.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
+        external_ids[llama_node_id] = make_pair(external_node_id, distance);
     }
+    if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
     // store the results in the given file
     if(dump2file != nullptr){
         COUT_DEBUG("save the results to: " << dump2file)
         fstream handle(dump2file, ios_base::out);
         if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");
 
-        auto hashtable = external_ids.lock_table();
-
-        for(const auto& keyvaluepair : hashtable){
-            handle << keyvaluepair.first << " ";
-            auto distance = keyvaluepair.second;
+        for(const auto& p : external_ids){
+            handle << p.first << " ";
+            auto distance = p.second;
             if(distance != decltype(instance)::__INVALID_LEVEL){
                 handle << distance;
             } else {

diff --git a/library/llama/llama_cdlp.cpp b/library/llama/llama_cdlp.cpp
@@ -67,56 +67,20 @@ void LLAMAClass::cdlp(uint64_t max_iterations, const char* dump2file){
 //    dump_snapshot(graph);
     slock.unlock(); // here we lose the ability to refer to m_vmap_read_only from now on
 
-    // execute the CDLP algortihm
+    // execute the CDLP algorithm
     unique_ptr<uint64_t[]> ptr_labels = cdlp_impl(timeout, graph, max_iterations);
-    uint64_t* labels = ptr_labels.get();
-
-    if(timeout.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
-    }
+    if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
     // translate from llama vertex ids to external vertex ids
-    auto names = graph.get_node_property_64(g_llama_property_names);
-    assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
-    cuckoohash_map</* external id */ uint64_t, /* score */ uint64_t> external_ids;
-    #pragma omp parallel for
-    for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
-        // first, does this node exist (or it's a gap?)
-        // this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
-        if(!graph.node_exists(llama_node_id)) continue;
-
-        // second, what's it's real node ID, in the external domain (e.g. user id)
-        uint64_t external_node_id = names->get(llama_node_id);
-
-        // third, its label
-        uint64_t label = labels[llama_node_id];
-
-        // finally, register the association
-        external_ids.insert(external_node_id, label);
-    }
-
-    if(timeout.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
-    }
+    auto external_ids = translate(graph, ptr_labels.get());
+    if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
 #if defined(LL_COUNTERS)
     ll_print_counters(stdout);
 #endif
 
-    // store the results in the given file
-    if(dump2file != nullptr){
-        COUT_DEBUG("save the results to: " << dump2file)
-        fstream handle(dump2file, ios_base::out);
-        if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");
-
-        auto hashtable = external_ids.lock_table();
-
-        for(const auto& keyvaluepair : hashtable){
-            handle << keyvaluepair.first << " " << keyvaluepair.second << "\n";
-        }
-
-        handle.close();
-    }
+    if(dump2file != nullptr) // store the results in the given file
+        save_results(external_ids, dump2file);
 }
 
 unique_ptr<uint64_t[]> LLAMAClass::cdlp_impl(TimeoutService& timer, ll_mlcsr_ro_graph& graph, uint64_t max_iterations){

diff --git a/library/llama/llama_class.cpp b/library/llama/llama_class.cpp
@@ -19,6 +19,7 @@
 #include "llama_internal.hpp"
 
 #include <cmath>
+#include <fstream>
 #include <iostream>
 #include <mutex>
 #include <shared_mutex> // shared_lock
@@ -632,6 +633,69 @@ void LLAMAClass::updates_stop(){
 }
 #endif
 
+/*****************************************************************************
+ *                                                                           *
+ *  Graphalytics Helpers                                                     *
+ *                                                                           *
+ *****************************************************************************/
+
+template <typename T>
+vector<pair<uint64_t, T>> LLAMAClass::translate(ll_mlcsr_ro_graph& graph, const T* __restrict data) {
+    const node_t N = graph.max_nodes(); // this is already a bit of a stretch
+    auto names = graph.get_node_property_64(g_llama_property_names);
+    vector<pair<uint64_t, T>> output(N);
+
+    #pragma omp parallel for
+    for(node_t llama_node_id = 0; llama_node_id < N; llama_node_id++){
+        // first, does this node exist (or it's a gap?)
+        // this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
+        if(!graph.node_exists(llama_node_id)) continue;
+
+        // second, what's it's real node ID, in the external domain (e.g. user id)
+        uint64_t external_node_id = names->get(llama_node_id);
+
+        // third, its score
+        T score = data[llama_node_id];
+
+        // finally, register the association
+        output[llama_node_id] = make_pair(external_node_id, score);
+    }
+
+    return output;
+}
+
+template <typename T, bool negative_scores>
+void LLAMAClass::save_results(const vector<pair<uint64_t, T>>& result, const char* dump2file) {
+    assert(dump2file != nullptr);
+    COUT_DEBUG("save the results to: " << dump2file);
+
+    fstream handle(dump2file, ios_base::out);
+    if (!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");
+
+    for (const auto &p : result) {
+        handle << p.first << " ";
+
+        if(!negative_scores && p.second < 0){
+            handle << numeric_limits<T>::max();
+        } else {
+            handle << p.second;
+        }
+
+        handle << "\n";
+    }
+
+    handle.close();
+}
+
+// Explicitly instantiate the templates
+#define INSTANTIATE_TRANSLATE( TYPE ) template vector<pair<uint64_t, TYPE>> LLAMAClass::translate<TYPE>(ll_mlcsr_ro_graph& graph, const TYPE* __restrict data); \
+  template void LLAMAClass::save_results<TYPE, true>(const vector<pair<uint64_t, TYPE>>& result, const char* dump2file); \
+  template void LLAMAClass::save_results<TYPE, false>(const vector<pair<uint64_t, TYPE>>& result, const char* dump2file);
+
+INSTANTIATE_TRANSLATE( int64_t );
+INSTANTIATE_TRANSLATE( uint64_t );
+INSTANTIATE_TRANSLATE( double );
+
 /*****************************************************************************
  *                                                                           *
  *  Dump                                                                     *

diff --git a/library/llama/llama_class.hpp b/library/llama/llama_class.hpp
@@ -133,6 +133,13 @@ class LLAMAClass : public virtual UpdateInterface, public virtual GraphalyticsIn
     // Internal implementation of the CDLP algorithm
     std::unique_ptr<uint64_t[]> cdlp_impl(utility::TimeoutService& timer, ll_mlcsr_ro_graph& graph, uint64_t max_iterations);
 
+    // Helper for Graphalytics: translate the logical IDs into external IDs
+    template <typename T>
+    std::vector<std::pair<uint64_t, T>> translate(ll_mlcsr_ro_graph& graph, const T* __restrict data);
+
+    // Helper, save the content of the vector to the given output file
+    template <typename T, bool negative_scores = true>
+    void save_results(const std::vector<std::pair<uint64_t, T>>& result, const char* dump2file);
 public:
     /**
      * Constructor

diff --git a/library/llama/llama_lcc.cpp b/library/llama/llama_lcc.cpp
@@ -434,53 +434,18 @@ void LLAMAClass::lcc(const char* dump2file){
     } else {
         llama_execute_lcc_undirected(timeout, graph, /* output */ scores);
     }
-
-    if(timeout.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
-    }
+    if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
     // translate from llama vertex ids to external vertex ids
-    auto names = graph.get_node_property_64(g_llama_property_names);
-    assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
-    cuckoohash_map</* external id */ uint64_t, /* score */ double> external_ids;
-    #pragma omp parallel for
-    for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
-        // first, does this node exist (or it's a gap?)
-        // this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
-        if(!graph.node_exists(llama_node_id)) continue;
-
-        // second, what's it's real node ID, in the external domain (e.g. user id)
-        uint64_t external_node_id = names->get(llama_node_id);
-
-        // third, its score
-        double score = scores[llama_node_id];
-
-        // finally, register the association
-        external_ids.insert(external_node_id, score);
-    }
-
-    if(timeout.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
-    }
+    auto external_ids = translate(graph, scores);
+    if(timeout.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
 #if defined(LL_COUNTERS)
     ll_print_counters(stdout);
 #endif
 
-    // store the results in the given file
-    if(dump2file != nullptr){
-        COUT_DEBUG("save the results to: " << dump2file)
-        fstream handle(dump2file, ios_base::out);
-        if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");
-
-        auto hashtable = external_ids.lock_table();
-
-        for(const auto& keyvaluepair : hashtable){
-            handle << keyvaluepair.first << " " << keyvaluepair.second << "\n";
-        }
-
-        handle.close();
-    }
+    if(dump2file != nullptr) // store the results in the given file
+        save_results(external_ids, dump2file);
 }
 
 } // namespace

diff --git a/library/llama/llama_pagerank.cpp b/library/llama/llama_pagerank.cpp
@@ -68,49 +68,14 @@ void LLAMAClass::pagerank(uint64_t num_iterations, double damping_factor, const
     unique_ptr<double[]> ptr_rank { new double[graph.max_nodes()] };
     double* rank = ptr_rank.get();
     pagerank_impl(timeout_srv, graph, current_num_vertices, num_iterations, damping_factor, /* output */ rank);
-
-    if(timeout_srv.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
-    }
+    if(timeout_srv.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
     // translate from llama ids to external vertex ids
-    auto names = graph.get_node_property_64(g_llama_property_names);
-    assert(names != nullptr && "Wrong string ID to refer the property attached to the vertices");
-    cuckoohash_map</* external id */ uint64_t, /* distance */ double> external_ids;
-    #pragma omp parallel for
-    for(node_t llama_node_id = 0; llama_node_id < graph.max_nodes(); llama_node_id++){
-        // first, does this node exist (or it's a gap?)
-        // this is a bit of a stretch: the impl~ from llama assumes that a node does not exist only if it does not have any incoming or outgoing edges.
-        if(!graph.node_exists(llama_node_id)) continue;
-
-        // second, what's it's real node ID, in the external domain (e.g. user id)
-        uint64_t external_node_id = names->get(llama_node_id);
-
-        // third, its distance
-        double distance = rank[llama_node_id];
-
-        // finally, register the association
-        external_ids.insert(external_node_id, distance);
-    }
+    auto external_ids = translate(graph, rank);
+    if(timeout_srv.is_timeout()){ RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer); }
 
-    if(timeout_srv.is_timeout()){
-        RAISE_EXCEPTION(TimeoutError, "Timeout occurred after " << timer);
-    }
-
-    // store the results in the given file
-    if(dump2file != nullptr){
-        COUT_DEBUG("save the results to: " << dump2file)
-        fstream handle(dump2file, ios_base::out);
-        if(!handle.good()) ERROR("Cannot save the result to `" << dump2file << "'");
-
-        auto hashtable = external_ids.lock_table();
-
-        for(const auto& keyvaluepair : hashtable){
-            handle << keyvaluepair.first << " " << keyvaluepair.second << "\n";
-        }
-
-        handle.close();
-    }
+    if(dump2file != nullptr) // store the results in the given file
+        save_results(external_ids, dump2file);
 }
 
 // Implementation derived from llama/benchmark/benchmarks/pagerank.h, class ll_b_pagerank_pull_ext