refactor: split structural hole functions into separate files for bet…

…ter modularity
easy-graph · Dec 23, 2024 · ecf5661 · ecf5661
1 parent 1fe6124
commit ecf5661
Show file tree

Hide file tree

Showing 13 changed files with 1,400 additions and 1,134 deletions.
diff --git a/cpp_easygraph/functions/structural_holes/evaluation.cpp b/cpp_easygraph/functions/structural_holes/evaluation.cpp
diff --git a/gpu_easygraph/functions/structural_holes/constraint.cpp b/gpu_easygraph/functions/structural_holes/constraint.cpp
@@ -0,0 +1,31 @@
+#include <string>
+#include <vector>
+#include <memory> // 需要包含memory头文件
+
+#include "structural_holes/constraint.cuh"
+#include "common.h"
+
+namespace gpu_easygraph {
+
+using std::vector;
+
+int constraint(
+    _IN_ const vector<int>& V,
+    _IN_ const vector<int>& E,
+    _IN_ const vector<int>& row,
+    _IN_ const vector<int>& col,
+    _IN_ int num_nodes,
+    _IN_ const vector<double>& W,
+    _IN_ bool is_directed,
+    _IN_ vector<int>& node_mask,  // 添加节点掩码参数
+    _OUT_ vector<double>& constraint
+) {
+    int num_edges = row.size();
+
+    constraint = vector<double>(num_nodes);
+    int r = cuda_constraint(V.data(), E.data(), row.data(), col.data(), W.data(), num_nodes, num_edges, is_directed, node_mask.data(), constraint.data());
+
+    return r;  // 成功
+}
+
+} // namespace gpu_easygraph
diff --git a/gpu_easygraph/functions/structural_holes/constraint.cu b/gpu_easygraph/functions/structural_holes/constraint.cu
@@ -0,0 +1,354 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <stdlib.h>
+
+#include "common.h"
+#define NODES_PER_BLOCK 1
+
+namespace gpu_easygraph {
+
+enum norm_t { SUM = 0, MAX = 1 };
+
+static __device__ double mutual_weight(
+    const int* V,
+    const int* E,
+    const double* W,
+    int u,
+    int v
+) {
+    double a_uv = 0.0;
+    for (int i = V[u]; i < V[u+1]; i++) {
+        if (E[i] == v) {
+            a_uv = W[i];
+            break;
+        }
+    }
+    return a_uv;
+}
+
+static __device__ double normalized_mutual_weight(
+    const int* V,
+    const int* E,
+    const double* W, 
+    int u,
+    int v,
+    norm_t norm
+) {
+    double weight_uv = mutual_weight(V, E, W, u, v);
+
+    double scale = 0.0;
+    if (norm == SUM) {
+        for (int i = V[u]; i < V[u+1]; i++) {
+            int neighbor = E[i];
+            double weight_uw = mutual_weight(V, E, W, u, neighbor);
+            scale += weight_uw;
+        }
+    } else if (norm == MAX) {
+        for (int i = V[u]; i < V[u+1]; i++) {
+            int neighbor = E[i];
+            double weight_uw = mutual_weight(V, E, W, u, neighbor);
+            scale = fmax(scale,weight_uw);
+        }
+    }
+    return (scale==0.0) ? 0.0 : (weight_uv / scale);
+}
+
+static __device__ double local_constraint(
+    const int* V,
+    const int* E,
+    const double* W,
+    int u,
+    int v
+) {
+    double direct = normalized_mutual_weight(V,E,W,u,v,SUM);
+    double indirect = 0.0;
+    for (int i = V[u]; i < V[u+1]; i++) {
+        int neighbor = E[i];
+        double norm_uw = normalized_mutual_weight(V, E, W, u, neighbor,SUM);
+        double norm_wv = normalized_mutual_weight(V, E, W, neighbor, v,SUM);
+        indirect += norm_uw * norm_wv;
+    }
+    double local_constraint_of_uv = (direct + indirect) * (direct + indirect);
+    return local_constraint_of_uv;
+}
+
+__global__ void calculate_constraints(
+    const int* __restrict__ V,
+    const int* __restrict__ E,
+    const double* __restrict__ W, 
+    const int num_nodes, 
+    const int* __restrict__ node_mask,
+    double* __restrict__ constraint_results
+) {
+    int start_node = blockIdx.x * NODES_PER_BLOCK;  // 当前块处理的起始节点
+    int end_node = min(start_node + NODES_PER_BLOCK, num_nodes);  // 结束节点，确保不越界
+
+    for (int v = start_node; v < end_node; ++v) {
+        if (!node_mask[v]) continue;
+
+        double constraint_of_v = 0.0;
+        bool is_nan = true;
+
+        // 使用所有线程并行处理邻居
+        __shared__ double shared_constraint[256];  // 假设 blockDim.x <= 256
+        double local_sum = 0.0;
+
+        for (int i = V[v] + threadIdx.x; i < V[v + 1]; i += blockDim.x) {
+            is_nan = false;
+            int neighbor = E[i];
+            local_sum += local_constraint(V, E, W, v, neighbor);
+        }
+
+        // 归约邻居结果到一个值
+        shared_constraint[threadIdx.x] = local_sum;
+        __syncthreads();
+
+        for (int offset = blockDim.x / 2; offset > 0; offset /= 2) {
+            if (threadIdx.x < offset) {
+                shared_constraint[threadIdx.x] += shared_constraint[threadIdx.x + offset];
+            }
+            __syncthreads();
+        }
+
+        // 保存最终结果
+        if (threadIdx.x == 0) {
+            constraint_results[v] = (is_nan) ? NAN : shared_constraint[0];
+        }
+    }
+}
+
+static __device__ double directed_mutual_weight(
+    const int* V,
+    const int* E,
+    const double* W,
+    int u,
+    int v
+) {
+    double a_uv = 0.0, a_vu = 0.0;
+    for (int i = V[u]; i < V[u+1]; i++) {
+        if (E[i] == v) {
+            a_uv = W[i];
+            break;
+        }
+    }
+    for (int i = V[v]; i < V[v+1]; i++) {
+        if (E[i] == u) {
+            a_vu = W[i];
+            break;
+        }
+    }
+    return a_uv + a_vu;
+}
+
+static __device__ double directed_normalized_mutual_weight(
+    const int* V,
+    const int* E,
+    const int* row, 
+    const int* col, 
+    const double* W, 
+    int num_edges,
+    int u,
+    int v,
+    norm_t norm
+) {
+    double weight_uv = directed_mutual_weight(V, E, W, u, v);
+
+    double scale = 0.0;
+    if(norm==SUM){
+        for (int i = V[u]; i < V[u+1]; i++) {
+            int neighbor = E[i];
+            double weight_uw = directed_mutual_weight(V, E, W, u, neighbor);
+            scale += weight_uw;
+        }
+
+        for (int i = 0; i < num_edges; i++) {
+            if (col[i] == u) {
+                int neighbor = row[i];
+                double weight_wu = directed_mutual_weight(V, E, W, u, neighbor);
+                scale += weight_wu;
+            }
+        }
+    }else if(norm==MAX){
+        for (int i = V[u]; i < V[u+1]; i++) {
+            int neighbor = E[i];
+            double weight_uw = directed_mutual_weight(V, E, W, u, neighbor);
+            scale = fmax(scale,weight_uw);
+        }
+
+        for (int i = 0; i < num_edges; i++) {
+            if (col[i] == u) {
+                int neighbor = row[i];
+                double weight_wu = directed_mutual_weight(V, E, W, u, neighbor);
+                scale = fmax(scale,weight_wu);
+            }
+        }
+    }
+    return (scale==0.0) ? 0.0 : (weight_uv / scale);
+}
+
+static __device__ double directed_local_constraint(
+    const int* V,
+    const int* E,
+    const int* row, 
+    const int* col, 
+    const double* W,
+    int num_edges,
+    int u,
+    int v
+) {
+    double direct = directed_normalized_mutual_weight(V,E,row,col,W,num_edges,u,v,SUM);
+    double indirect = 0.0;
+    for (int i = V[u]; i < V[u+1]; i++) {
+        int neighbor = E[i];
+        double norm_uw = directed_normalized_mutual_weight(V, E, row, col, W, num_edges, u, neighbor,SUM);
+        double norm_wv = directed_normalized_mutual_weight(V, E, row, col, W, num_edges, neighbor, v,SUM);
+        indirect += norm_uw * norm_wv;
+    }
+
+    for (int i = 0; i < num_edges; i++) {
+        if (col[i] == u) {
+            int neighbor = row[i];
+            double norm_uw = directed_normalized_mutual_weight(V, E, row, col, W, num_edges, u, neighbor,SUM);
+            double norm_wv = directed_normalized_mutual_weight(V, E, row, col, W, num_edges, neighbor, v,SUM);
+            indirect += norm_uw * norm_wv;
+        }
+    }
+    double local_constraint_of_uv = (direct + indirect) * (direct + indirect);
+    return local_constraint_of_uv;
+}
+
+__global__ void directed_calculate_constraints(
+    const int* V,
+    const int* E,
+    const int* row, 
+    const int* col, 
+    const double* W,  
+    int num_nodes,
+    int num_edges,
+    int* node_mask,
+    double* constraint_results
+) {
+    int start_node = blockIdx.x * NODES_PER_BLOCK;  // 当前块处理的起始节点
+    int end_node = min(start_node + NODES_PER_BLOCK, num_nodes);  // 结束节点，确保不越界
+
+    for (int v = start_node; v < end_node; ++v) {
+        if (!node_mask[v]) continue;
+
+        double constraint_of_v = 0.0;
+        bool is_nan = true;
+
+        // 使用所有线程并行处理邻居
+        __shared__ double shared_constraint[256];  // 假设 blockDim.x <= 256
+        double local_sum = 0.0;
+
+        // 计算出邻居约束（out-neighbors）
+        for (int i = V[v] + threadIdx.x; i < V[v + 1]; i += blockDim.x) {
+            is_nan = false;
+            int neighbor = E[i];
+            local_sum += directed_local_constraint(V, E, row, col, W, num_edges, v, neighbor);
+        }
+
+        // 计算入邻居约束（in-neighbors）
+        for (int i = threadIdx.x; i < num_edges; i += blockDim.x) {
+            if (col[i] == v) {
+                // is_nan = false;
+                int neighbor = row[i];
+                local_sum += directed_local_constraint(V, E, row, col, W, num_edges, v, neighbor);
+            }
+        }
+
+        // 将所有线程的局部结果写入共享内存
+        shared_constraint[threadIdx.x] = local_sum;
+        __syncthreads();
+
+        // 归约邻居结果到一个值
+        for (int offset = blockDim.x / 2; offset > 0; offset /= 2) {
+            if (threadIdx.x < offset) {
+                shared_constraint[threadIdx.x] += shared_constraint[threadIdx.x + offset];
+            }
+            __syncthreads();
+        }
+
+        // 保存最终结果
+        if (threadIdx.x == 0) {
+            constraint_results[v] = (is_nan) ? NAN : shared_constraint[0];
+        }
+    }
+}
+
+
+int cuda_constraint(
+    _IN_ const int* V,
+    _IN_ const int* E,
+    _IN_ const int* row,
+    _IN_ const int* col,
+    _IN_ const double* W,
+    _IN_ int num_nodes,
+    _IN_ int num_edges,
+    _IN_ bool is_directed,
+    _IN_ int* node_mask,
+    _OUT_ double* constraint_results
+) {
+    int cuda_ret = cudaSuccess;
+    int EG_ret = EG_GPU_SUCC;
+
+    int* d_V;
+    int* d_E;
+    int* d_row;
+    int* d_col;
+    double* d_W;
+    int* d_node_mask;
+    double* d_constraint_results;
+    int block_size = 256;  // 每个块的线程数，可以根据设备进行调整
+    int grid_size = (num_nodes + NODES_PER_BLOCK - 1) / NODES_PER_BLOCK;  // 调整为每个块处理NODES_PER_BLOCK个节点
+
+    // 分配CUDA设备内存
+    EXIT_IF_CUDA_FAILED(cudaMalloc((void**)&d_V, (num_nodes+1) * sizeof(int)));
+    EXIT_IF_CUDA_FAILED(cudaMalloc((void**)&d_E, num_edges * sizeof(int)));
+    EXIT_IF_CUDA_FAILED(cudaMalloc((void**)&d_row, num_edges * sizeof(int)));
+    EXIT_IF_CUDA_FAILED(cudaMalloc((void**)&d_col, num_edges * sizeof(int)));
+    EXIT_IF_CUDA_FAILED(cudaMalloc((void**)&d_W, num_edges * sizeof(double)));
+    EXIT_IF_CUDA_FAILED(cudaMalloc((void**)&d_node_mask, num_nodes * sizeof(int)));
+    EXIT_IF_CUDA_FAILED(cudaMalloc((void**)&d_constraint_results, num_nodes * sizeof(double)));
+
+    // 将数据从主机拷贝到设备
+    EXIT_IF_CUDA_FAILED(cudaMemcpy(d_V, V, (num_nodes+1) * sizeof(int), cudaMemcpyHostToDevice));
+    EXIT_IF_CUDA_FAILED(cudaMemcpy(d_E, E, num_edges * sizeof(int), cudaMemcpyHostToDevice));
+    EXIT_IF_CUDA_FAILED(cudaMemcpy(d_row, row, num_edges * sizeof(int), cudaMemcpyHostToDevice));
+    EXIT_IF_CUDA_FAILED(cudaMemcpy(d_col, col, num_edges * sizeof(int), cudaMemcpyHostToDevice));
+    EXIT_IF_CUDA_FAILED(cudaMemcpy(d_node_mask, node_mask, num_nodes * sizeof(int), cudaMemcpyHostToDevice));
+    EXIT_IF_CUDA_FAILED(cudaMemcpy(d_W, W, num_edges * sizeof(double), cudaMemcpyHostToDevice));
+
+
+    // 调用 CUDA 内核
+    if(is_directed){
+        directed_calculate_constraints<<<grid_size, block_size>>>(d_V, d_E, d_row, d_col, d_W, num_nodes, num_edges, d_node_mask, d_constraint_results);
+    }else{
+        calculate_constraints<<<grid_size, block_size>>>(d_V, d_E, d_W, num_nodes, d_node_mask, d_constraint_results);
+    }
+
+    EXIT_IF_CUDA_FAILED(cudaMemcpy(constraint_results, d_constraint_results, num_nodes * sizeof(double), cudaMemcpyDeviceToHost));
+exit:
+
+    cudaFree(d_V);
+    cudaFree(d_E);
+    cudaFree(d_row);
+    cudaFree(d_col);
+    cudaFree(d_W);
+    cudaFree(d_node_mask);
+    cudaFree(d_constraint_results);
+    if (cuda_ret != cudaSuccess) {
+        switch (cuda_ret) {
+            case cudaErrorMemoryAllocation:
+                EG_ret = EG_GPU_FAILED_TO_ALLOCATE_DEVICE_MEM;
+                break;
+            default:
+                EG_ret = EG_GPU_DEVICE_ERR;
+                break;
+        }
+    }
+
+    return EG_ret; 
+}
+
+} // namespace gpu_easygraph