add bfs and sssp

chenxuhao · Apr 4, 2023 · f4f8e3c · f4f8e3c
1 parent d3b5340
commit f4f8e3c
Show file tree

Hide file tree

Showing 13 changed files with 681 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -50,3 +50,5 @@ modules.order
 Module.symvers
 Mkfile.old
 dkms.conf
+
+bin/*
diff --git a/include/bitmap.h b/include/bitmap.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2015, The Regents of the University of California (Regents)
+// See LICENSE.txt for license details
+
+#ifndef BITMAP_H_
+#define BITMAP_H_
+
+//#include <algorithm>
+#include <cstddef>
+#include "platform_atomics.h"
+typedef unsigned long uint64_t;
+/*
+GAP Benchmark Suite
+Class:  Bitmap
+Author: Scott Beamer
+
+Parallel bitmap that is thread-safe
+ - Can set bits in parallel (set_bit_atomic) unlike std::vector<bool>
+*/
+
+
+class Bitmap {
+ public:
+  explicit Bitmap(size_t size) {
+    uint64_t num_words = (size + kBitsPerWord - 1) / kBitsPerWord;
+#ifdef SIM
+    start_ = (uint64_t *)aligned_alloc(PAGE_SIZE, num_words*sizeof(uint64_t));
+#else
+    start_ = new uint64_t[num_words];
+#endif
+    end_ = start_ + num_words;
+  }
+
+  ~Bitmap() {
+    delete[] start_;
+  }
+
+  void reset() {
+    //std::fill(start_, end_, 0);
+	uint64_t *ptr = start_;
+	for(; ptr != end_; ++ ptr) *ptr = 0;
+  }
+
+  void set_bit(size_t pos) {
+    start_[word_offset(pos)] |= ((uint64_t) 1l << bit_offset(pos));
+  }
+
+  void set_bit_atomic(size_t pos) {
+    uint64_t old_val, new_val;
+    do {
+      old_val = start_[word_offset(pos)];
+      new_val = old_val | ((uint64_t) 1l << bit_offset(pos));
+    } while (!compare_and_swap(start_[word_offset(pos)], old_val, new_val));
+  }
+
+  bool get_bit(size_t pos) const {
+    return (start_[word_offset(pos)] >> bit_offset(pos)) & 1l;
+  }
+
+  void swap(Bitmap &other) {
+    //std::swap(start_, other.start_);
+    //std::swap(end_, other.end_);
+	uint64_t *temp = start_;
+	start_ = other.start_;
+	other.start_ = temp;
+	temp = end_;
+	end_ = other.end_;
+	other.end_ = temp;
+  }
+
+  uint64_t *start_;
+  uint64_t *end_;
+
+ private:
+  static const uint64_t kBitsPerWord = 64;
+  static uint64_t word_offset(size_t n) { return n / kBitsPerWord; }
+  static uint64_t bit_offset(size_t n) { return n & (kBitsPerWord - 1); }
+};
+
+#endif  // BITMAP_H_
diff --git a/include/common.h b/include/common.h
@@ -34,9 +34,9 @@ typedef uint8_t patid_t;   // pattern id type
 typedef uint8_t mask_t;    // mask type
 typedef uint8_t label_t;   // label type
 typedef uint8_t vlabel_t;  // vertex label type
-//typedef int32_t elabel_t;  // edge label type
+typedef int32_t elabel_t;  // edge label type
 //typedef uint16_t elabel_t; // edge label type
-typedef float   elabel_t;  // edge label type
+//typedef float   elabel_t;  // edge label type
 typedef uint8_t cmap_vt;   // cmap value type
 typedef uint32_t vidType;   // vertex ID type
 typedef int64_t eidType;   // edge ID type

diff --git a/include/sliding_queue.h b/include/sliding_queue.h
@@ -0,0 +1,122 @@
+// Copyright (c) 2015, The Regents of the University of California (Regents)
+// See LICENSE.txt for license details
+
+#ifndef SLIDING_QUEUE_H_
+#define SLIDING_QUEUE_H_
+
+#include <iostream>
+#include "platform_atomics.h"
+#ifdef SIM
+#include "sim.h"
+#endif
+
+/*
+GAP Benchmark Suite
+Class:  SlidingQueue
+Author: Scott Beamer
+
+Double-buffered queue so appends aren't seen until SlideWindow() called
+ - Use QueueBuffer when used in parallel to avoid false sharing by doing
+   bulk appends from thread-local storage
+*/
+
+
+template <typename T>
+class QueueBuffer;
+
+template <typename T>
+class SlidingQueue {
+  T *shared;
+  size_t shared_in;
+  size_t shared_out_start;
+  size_t shared_out_end;
+  friend class QueueBuffer<T>;
+
+ public:
+  explicit SlidingQueue(size_t shared_size) {
+    shared = new T[shared_size];
+#ifdef SIM
+	T* baseVisitAddress = shared;
+	T* endVisitAddress = &shared[shared_size];
+	set_addr_bounds(0,(uint64_t)baseVisitAddress,(uint64_t)endVisitAddress,8);
+	std::cout << "Worklist address from" << baseVisitAddress << "to" << endVisitAddress << std::endl;
+#endif
+    reset();
+  }
+
+  ~SlidingQueue() {
+    delete[] shared;
+  }
+
+  void push_back(T to_add) {
+    shared[shared_in++] = to_add;
+  }
+
+  bool empty() const {
+    return shared_out_start == shared_out_end;
+  }
+
+  void reset() {
+    shared_out_start = 0;
+    shared_out_end = 0;
+    shared_in = 0;
+  }
+
+  void slide_window() {
+    shared_out_start = shared_out_end;
+    shared_out_end = shared_in;
+  }
+
+  typedef T* iterator;
+
+  iterator begin() const {
+    return shared + shared_out_start;
+  }
+
+  iterator end() const {
+    return shared + shared_out_end;
+  }
+
+  size_t size() const {
+    return end() - begin();
+  }
+};
+
+
+template <typename T>
+class QueueBuffer {
+  size_t in;
+  T *local_queue;
+  SlidingQueue<T> &sq;
+  const size_t local_size;
+
+ public:
+  explicit QueueBuffer(SlidingQueue<T> &master, size_t given_size = 16384)
+      : sq(master), local_size(given_size) {
+    in = 0;
+    local_queue = new T[local_size];
+  }
+
+  ~QueueBuffer() {
+    delete[] local_queue;
+  }
+
+  void push_back(T to_add) {
+    if (in == local_size)
+      flush();
+    local_queue[in++] = to_add;
+  }
+
+  void flush() {
+    T *shared_queue = sq.shared;
+    size_t copy_start = fetch_and_add(sq.shared_in, in);
+    //std::copy(local_queue, local_queue+in, shared_queue+copy_start);
+	T *first = local_queue;
+	T *last = local_queue + in;
+	T *d_first = shared_queue + copy_start;
+	while (first != last) *d_first++ = *first++;
+    in = 0;
+  }
+};
+
+#endif  // SLIDING_QUEUE_H_
diff --git a/src/bfs/Makefile b/src/bfs/Makefile
@@ -0,0 +1,18 @@
+include ../common.mk
+OBJS += verifier.o main.o
+all: bfs_omp_base 
+
+bfs_omp_base: omp_base.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) $(OBJS) omp_base.o -o $@ -lgomp
+	mv $@ $(BIN)
+
+bfs_omp_topo: omp_topo.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) $(OBJS) omp_topo.o -o $@ -lgomp
+	mv $@ $(BIN)
+
+bfs_omp_direction: omp_direction.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) $(OBJS) omp_direction.o -o $@ -lgomp
+	mv $@ $(BIN)
+
+clean:
+	rm *.o
diff --git a/src/bfs/main.cc b/src/bfs/main.cc
@@ -0,0 +1,29 @@
+// Copyright 2020 MIT
+// Authors: Xuhao Chen <[email protected]>
+#include "graph.h"
+
+void BFSSolver(Graph &g, vidType source, vidType *dist);
+void BFSVerifier(Graph &g, vidType source, vidType *depth_to_test);
+
+int main(int argc, char *argv[]) {
+  if (argc < 2) {
+    std::cout << "Usage: " << argv[0] << " <graph>"
+      << " [source_id(0)] [reverse(0)]\n";
+    std::cout << "Example: " << argv[0] << " ../inputs/mico/graph\n";
+    exit(1);
+  }
+  vidType source = 0;
+  if (argc > 2) source = atoi(argv[2]);
+  std::cout << "Source vertex: " << source << "\n";
+  int reverse = 0;
+  if (argc > 3) reverse = atoi(argv[3]);
+  std::cout << "Using reverse graph\n";
+  std::cout << "Breadth-first Search\n";
+  Graph g(argv[1], 0, 1, 0, 0, reverse);
+  g.print_meta_data();
+  assert(source >=0 && source < g.V());
+  std::vector<vidType> distances(g.V(), MYINFINITY);
+  BFSSolver(g, source, &distances[0]);
+  BFSVerifier(g, source, &distances[0]);
+  return 0;
+}
diff --git a/src/bfs/omp_base.cc b/src/bfs/omp_base.cc
@@ -0,0 +1,59 @@
+// Copyright 2020 MIT
+// Authors: Xuhao Chen <[email protected]>
+#include "graph.h"
+#include "bitmap.h"
+#include "sliding_queue.h"
+#include "platform_atomics.h"
+
+void bfs_step(Graph &g, vidType *depth, SlidingQueue<vidType> &queue) {
+  #pragma omp parallel
+  {
+    QueueBuffer<vidType> lqueue(queue);
+    #pragma omp for
+    for (auto q_iter = queue.begin(); q_iter < queue.end(); q_iter++) {
+      auto src = *q_iter;
+      for (auto dst : g.N(src)) {
+        //int curr_val = parent[dst];
+        auto curr_val = depth[dst];
+        if (curr_val == MYINFINITY) { // not visited
+          //if (compare_and_swap(parent[dst], curr_val, src)) {
+          if (compare_and_swap(depth[dst], curr_val, depth[src] + 1)) {
+            lqueue.push_back(dst);
+          }
+        }
+      }
+    }
+    lqueue.flush();
+  }
+}
+
+void BFSSolver(Graph &g, vidType source, vidType* dist) {
+  int num_threads = 1;
+  #pragma omp parallel
+  {
+    num_threads = omp_get_num_threads();
+  }
+  std::cout << "OpenMP BFS (" << num_threads << " threads)\n";
+  VertexList depth(g.V(), MYINFINITY);
+  depth[source] = 0;
+  int iter = 0;
+  Timer t;
+  t.Start();
+  SlidingQueue<vidType> queue(g.E());
+  queue.push_back(source);
+  queue.slide_window();
+  while (!queue.empty()) {
+    ++ iter;
+    std::cout << "iteration=" << iter << ", frontier_size=" << queue.size() << "\n";
+    bfs_step(g, depth.data(), queue);
+    queue.slide_window();
+  }
+  t.Stop();
+  std::cout << "iterations = " << iter << "\n";
+  std::cout << "runtime [omp_base] = " << t.Seconds() << " sec\n";
+  #pragma omp parallel for
+  for (vidType i = 0; i < g.V(); i ++)
+    dist[i] = depth[i];
+}
+
+void SSSPSolver(Graph &g, vidType source, elabel_t *dist, int delta) {}