diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d799a60..52d323dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ endif() # Setup version -set(VERSION_STRING "2.1.1") +set(VERSION_STRING "2.1.2") rocm_setup_version(VERSION ${VERSION_STRING}) set(rocalution_SOVERSION 0.1) diff --git a/clients/include/common.hpp b/clients/include/common.hpp index 4a41ced6..5cdb2507 100644 --- a/clients/include/common.hpp +++ b/clients/include/common.hpp @@ -40,6 +40,8 @@ void distribute_matrix(const MPI_Comm* comm, MPI_Comm_size(*comm, &num_procs); size_t global_nrow = lmat->GetM(); + size_t global_ncol = lmat->GetN(); + size_t global_nnz = lmat->GetNnz(); int* global_row_offset = NULL; int* global_col = NULL; @@ -47,6 +49,21 @@ void distribute_matrix(const MPI_Comm* comm, lmat->LeaveDataPtrCSR(&global_row_offset, &global_col, &global_val); + // If we have only a single MPI rank, we are done + if(num_procs == 1) + { + pm->SetMPICommunicator(comm); + pm->SetGlobalNrow(global_nrow); + pm->SetGlobalNcol(global_ncol); + pm->SetLocalNrow(global_nrow); + pm->SetLocalNcol(global_ncol); + + gmat->SetParallelManager(*pm); + gmat->SetLocalDataPtrCSR(&global_row_offset, &global_col, &global_val, "mat", global_nnz); + + return; + } + // Compute local matrix sizes std::vector local_size(num_procs); diff --git a/clients/include/testing_local_vector.hpp b/clients/include/testing_local_vector.hpp index b9b1bf09..41a7a184 100644 --- a/clients/include/testing_local_vector.hpp +++ b/clients/include/testing_local_vector.hpp @@ -79,12 +79,6 @@ void testing_local_vector_bad_args(void) ".*Assertion.*index != (NULL|__null)*"); } - // GetIndexValues - { - T* null_T = nullptr; - ASSERT_DEATH(vec.GetIndexValues(null_T), ".*Assertion.*values != (NULL|__null)*"); - } - // SetIndexValues { T* null_T = nullptr; @@ -98,13 +92,6 @@ void testing_local_vector_bad_args(void) ".*Assertion.*values != (NULL|__null)*"); } - // SetContinuousValues - { - T* null_T = nullptr; - ASSERT_DEATH(vec.SetContinuousValues(0, 0, null_T), - ".*Assertion.*values != (NULL|__null)*"); - } - // ExtractCoarseMapping { int* null_int = nullptr; diff --git a/clients/samples/cg-amg_mpi.cpp b/clients/samples/cg-amg_mpi.cpp index a9e78abf..7436598e 100644 --- a/clients/samples/cg-amg_mpi.cpp +++ b/clients/samples/cg-amg_mpi.cpp @@ -43,13 +43,6 @@ int main(int argc, char* argv[]) MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &num_procs); - // Check command line parameters - if(num_procs < 2) - { - std::cerr << "Expecting at least 2 MPI processes" << std::endl; - return -1; - } - if(argc < 2) { std::cerr << argv[0] << " " << std::endl; diff --git a/clients/samples/cg_mpi.cpp b/clients/samples/cg_mpi.cpp index 483502f7..d5900ed8 100644 --- a/clients/samples/cg_mpi.cpp +++ b/clients/samples/cg_mpi.cpp @@ -43,13 +43,6 @@ int main(int argc, char* argv[]) MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &num_procs); - // Check command line parameters - if(num_procs < 2) - { - std::cerr << "Expecting at least 2 MPI processes" << std::endl; - return -1; - } - if(argc < 2) { std::cerr << argv[0] << " " << std::endl; diff --git a/src/base/global_matrix.cpp b/src/base/global_matrix.cpp index e9af6132..a4ff5257 100644 --- a/src/base/global_matrix.cpp +++ b/src/base/global_matrix.cpp @@ -53,6 +53,8 @@ namespace rocalution FATAL_ERROR(__FILE__, __LINE__); #endif + this->pm_ = NULL; + this->object_name_ = ""; this->nnz_ = 0; @@ -1518,14 +1520,20 @@ namespace rocalution rGsize, ordering); - // TODO asserts - - LocalMatrix tmp; - tmp.CloneFrom(this->matrix_ghost_); - tmp.ConvertToCSR(); + if(this->pm_ == NULL || this->pm_->num_procs_ == 1) + { + this->matrix_interior_.InitialPairwiseAggregation( + beta, nc, G, Gsize, rG, rGsize, ordering); + } + else + { + LocalMatrix tmp; + tmp.CloneFrom(this->matrix_ghost_); + tmp.ConvertToCSR(); - this->matrix_interior_.InitialPairwiseAggregation( - tmp, beta, nc, G, Gsize, rG, rGsize, ordering); + this->matrix_interior_.InitialPairwiseAggregation( + tmp, beta, nc, G, Gsize, rG, rGsize, ordering); + } } template @@ -1547,14 +1555,20 @@ namespace rocalution rGsize, ordering); - // TODO asserts - - LocalMatrix tmp; - tmp.CloneFrom(this->matrix_ghost_); - tmp.ConvertToCSR(); + if(this->pm_ == NULL || this->pm_->num_procs_ == 1) + { + this->matrix_interior_.FurtherPairwiseAggregation( + beta, nc, G, Gsize, rG, rGsize, ordering); + } + else + { + LocalMatrix tmp; + tmp.CloneFrom(this->matrix_ghost_); + tmp.ConvertToCSR(); - this->matrix_interior_.FurtherPairwiseAggregation( - tmp, beta, nc, G, Gsize, rG, rGsize, ordering); + this->matrix_interior_.FurtherPairwiseAggregation( + tmp, beta, nc, G, Gsize, rG, rGsize, ordering); + } } template @@ -1582,7 +1596,24 @@ namespace rocalution assert(pm != NULL); assert(rG != NULL); - // TODO asserts + if(this->pm_ == NULL || this->pm_->num_procs_ == 1) + { + this->matrix_interior_.CoarsenOperator( + &Ac->matrix_interior_, pm, nrow, ncol, G, Gsize, rG, rGsize); + + pm->Clear(); + pm->SetMPICommunicator(this->pm_->comm_); + + pm->SetGlobalNrow(Ac->matrix_interior_.GetM()); + pm->SetGlobalNcol(Ac->matrix_interior_.GetN()); + + pm->SetLocalNrow(Ac->matrix_interior_.GetM()); + pm->SetLocalNcol(Ac->matrix_interior_.GetN()); + + Ac->SetParallelManager(*pm); + + return; + } #ifdef SUPPORT_MULTINODE // MPI Requests for sync diff --git a/src/base/global_vector.cpp b/src/base/global_vector.cpp index 74c64632..d05a981d 100644 --- a/src/base/global_vector.cpp +++ b/src/base/global_vector.cpp @@ -51,6 +51,8 @@ namespace rocalution FATAL_ERROR(__FILE__, __LINE__); #endif + this->pm_ = NULL; + this->object_name_ = ""; } diff --git a/src/base/hip/hip_vector.cpp b/src/base/hip/hip_vector.cpp index f6d40b13..979d9f2d 100644 --- a/src/base/hip/hip_vector.cpp +++ b/src/base/hip/hip_vector.cpp @@ -180,11 +180,14 @@ namespace rocalution hipMemcpyHostToDevice); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpy(this->index_array_, - cast_vec->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyHostToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpy(this->index_array_, + cast_vec->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyHostToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } else @@ -228,11 +231,14 @@ namespace rocalution hipMemcpyDeviceToHost); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpy(cast_vec->index_array_, - this->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyDeviceToHost); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpy(cast_vec->index_array_, + this->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyDeviceToHost); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } else @@ -277,11 +283,14 @@ namespace rocalution hipMemcpyHostToDevice); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpyAsync(this->index_array_, - cast_vec->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyHostToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpyAsync(this->index_array_, + cast_vec->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyHostToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } else @@ -325,11 +334,14 @@ namespace rocalution hipMemcpyDeviceToHost); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpyAsync(cast_vec->index_array_, - this->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyDeviceToHost); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpyAsync(cast_vec->index_array_, + this->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyDeviceToHost); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } else @@ -378,11 +390,14 @@ namespace rocalution hipMemcpyDeviceToDevice); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpy(this->index_array_, - hip_cast_vec->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyDeviceToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpy(this->index_array_, + hip_cast_vec->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyDeviceToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } } @@ -440,11 +455,14 @@ namespace rocalution hipMemcpyDeviceToDevice); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpy(this->index_array_, - hip_cast_vec->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyDeviceToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpy(this->index_array_, + hip_cast_vec->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyDeviceToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } } @@ -535,11 +553,14 @@ namespace rocalution hipMemcpyDeviceToDevice); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpy(hip_cast_vec->index_array_, - this->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyDeviceToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpy(hip_cast_vec->index_array_, + this->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyDeviceToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } } @@ -597,11 +618,14 @@ namespace rocalution hipMemcpyDeviceToDevice); CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpy(hip_cast_vec->index_array_, - this->index_array_, - this->index_size_ * sizeof(int), - hipMemcpyDeviceToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + if(this->index_size_ > 0) + { + hipMemcpy(hip_cast_vec->index_array_, + this->index_array_, + this->index_size_ * sizeof(int), + hipMemcpyDeviceToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } } } @@ -1383,68 +1407,77 @@ namespace rocalution template void HIPAcceleratorVector::SetIndexArray(int size, const int* index) { - assert(size > 0); + assert(size >= 0); assert(this->size_ >= size); this->index_size_ = size; - allocate_hip(this->index_size_, &this->index_array_); - allocate_hip(this->index_size_, &this->index_buffer_); + if(this->index_size_ > 0) + { + allocate_hip(this->index_size_, &this->index_array_); + allocate_hip(this->index_size_, &this->index_buffer_); - hipMemcpy( - this->index_array_, index, this->index_size_ * sizeof(int), hipMemcpyHostToDevice); + hipMemcpy( + this->index_array_, index, this->index_size_ * sizeof(int), hipMemcpyHostToDevice); + } } template void HIPAcceleratorVector::GetIndexValues(ValueType* values) const { - assert(values != NULL); + if(this->index_size_ > 0) + { + assert(values != NULL); - dim3 BlockSize(this->local_backend_.HIP_block_size); - dim3 GridSize(this->index_size_ / this->local_backend_.HIP_block_size + 1); + dim3 BlockSize(this->local_backend_.HIP_block_size); + dim3 GridSize(this->index_size_ / this->local_backend_.HIP_block_size + 1); - hipLaunchKernelGGL((kernel_get_index_values), - GridSize, - BlockSize, - 0, - 0, - this->index_size_, - this->index_array_, - this->vec_, - this->index_buffer_); - CHECK_HIP_ERROR(__FILE__, __LINE__); + hipLaunchKernelGGL((kernel_get_index_values), + GridSize, + BlockSize, + 0, + 0, + this->index_size_, + this->index_array_, + this->vec_, + this->index_buffer_); + CHECK_HIP_ERROR(__FILE__, __LINE__); - hipMemcpy(values, - this->index_buffer_, - this->index_size_ * sizeof(ValueType), - hipMemcpyDeviceToHost); - CHECK_HIP_ERROR(__FILE__, __LINE__); + hipMemcpy(values, + this->index_buffer_, + this->index_size_ * sizeof(ValueType), + hipMemcpyDeviceToHost); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } template void HIPAcceleratorVector::SetIndexValues(const ValueType* values) { - assert(values != NULL); + if(this->index_size_ > 0) + { + assert(values != NULL); - hipMemcpy(this->index_buffer_, - values, - this->index_size_ * sizeof(ValueType), - hipMemcpyHostToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + hipMemcpy(this->index_buffer_, + values, + this->index_size_ * sizeof(ValueType), + hipMemcpyHostToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); - dim3 BlockSize(this->local_backend_.HIP_block_size); - dim3 GridSize(this->index_size_ / this->local_backend_.HIP_block_size + 1); + dim3 BlockSize(this->local_backend_.HIP_block_size); + dim3 GridSize(this->index_size_ / this->local_backend_.HIP_block_size + 1); - hipLaunchKernelGGL((kernel_set_index_values), - GridSize, - BlockSize, - 0, - 0, - this->index_size_, - this->index_array_, - this->index_buffer_, - this->vec_); - CHECK_HIP_ERROR(__FILE__, __LINE__); + hipLaunchKernelGGL((kernel_set_index_values), + GridSize, + BlockSize, + 0, + 0, + this->index_size_, + this->index_array_, + this->index_buffer_, + this->vec_); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } template @@ -1470,11 +1503,16 @@ namespace rocalution assert(start >= 0); assert(end >= start); assert(end <= this->size_); - assert(values != NULL); - hipMemcpy( - this->vec_ + start, values, (end - start) * sizeof(ValueType), hipMemcpyHostToDevice); - CHECK_HIP_ERROR(__FILE__, __LINE__); + int size = end - start; + + if(size > 0) + { + assert(values != NULL); + + hipMemcpy(this->vec_ + start, values, size * sizeof(ValueType), hipMemcpyHostToDevice); + CHECK_HIP_ERROR(__FILE__, __LINE__); + } } template diff --git a/src/base/host/host_matrix_csr.cpp b/src/base/host/host_matrix_csr.cpp index a41d8aa4..923ecbde 100644 --- a/src/base/host/host_matrix_csr.cpp +++ b/src/base/host/host_matrix_csr.cpp @@ -6024,9 +6024,12 @@ namespace rocalution } } - for(int j = cast_mat->mat_.row_offset[i]; j < cast_mat->mat_.row_offset[i + 1]; ++j) + if(cast_mat->nnz_ > 0) { - sum += std::abs(cast_mat->mat_.val[j]); + for(int j = cast_mat->mat_.row_offset[i]; j < cast_mat->mat_.row_offset[i + 1]; ++j) + { + sum += std::abs(cast_mat->mat_.val[j]); + } } sum *= static_cast(5); @@ -6157,18 +6160,21 @@ namespace rocalution } } - for(int j = cast_mat->mat_.row_offset[i]; j < cast_mat->mat_.row_offset[i + 1]; ++j) + if(cast_mat->nnz_ > 0) { - ValueType val_j = cast_mat->mat_.val[j]; - - if(neg == true) + for(int j = cast_mat->mat_.row_offset[i]; j < cast_mat->mat_.row_offset[i + 1]; ++j) { - val_j *= static_cast(-1); - } + ValueType val_j = cast_mat->mat_.val[j]; - if(val_j > max_a_ij) - { - max_a_ij = val_j; + if(neg == true) + { + val_j *= static_cast(-1); + } + + if(val_j > max_a_ij) + { + max_a_ij = val_j; + } } } @@ -6591,18 +6597,21 @@ namespace rocalution } } - for(int j = cast_mat->mat_.row_offset[i]; j < cast_mat->mat_.row_offset[i + 1]; ++j) + if(cast_mat->nnz_ > 0) { - ValueType val_j = cast_mat->mat_.val[j]; - - if(neg == true) + for(int j = cast_mat->mat_.row_offset[i]; j < cast_mat->mat_.row_offset[i + 1]; ++j) { - val_j *= static_cast(-1); - } + ValueType val_j = cast_mat->mat_.val[j]; - if(val_j > max_a_ij) - { - max_a_ij = val_j; + if(neg == true) + { + val_j *= static_cast(-1); + } + + if(val_j > max_a_ij) + { + max_a_ij = val_j; + } } } diff --git a/src/base/host/host_vector.cpp b/src/base/host/host_vector.cpp index d16f7394..3c1f048e 100644 --- a/src/base/host/host_vector.cpp +++ b/src/base/host/host_vector.cpp @@ -1359,27 +1359,34 @@ namespace rocalution template void HostVector::SetIndexArray(int size, const int* index) { - assert(index != NULL); - assert(size > 0); + assert(size >= 0); this->index_size_ = size; - allocate_host(this->index_size_, &this->index_array_); - - for(int i = 0; i < this->index_size_; ++i) + if(this->index_size_ > 0) { - this->index_array_[i] = index[i]; + assert(index != NULL); + + allocate_host(this->index_size_, &this->index_array_); + + for(int i = 0; i < this->index_size_; ++i) + { + this->index_array_[i] = index[i]; + } } } template void HostVector::GetIndexValues(ValueType* values) const { - assert(values != NULL); - - for(int i = 0; i < this->index_size_; ++i) + if(this->index_size_ > 0) { - values[i] = this->vec_[this->index_array_[i]]; + assert(values != NULL); + + for(int i = 0; i < this->index_size_; ++i) + { + values[i] = this->vec_[this->index_array_[i]]; + } } } @@ -1414,11 +1421,15 @@ namespace rocalution assert(start >= 0); assert(end >= start); assert(end <= this->GetSize()); - assert(values != NULL); - for(int i = start, j = 0; i < end; ++i, ++j) + if(end - start > 0) { - this->vec_[i] = values[j]; + assert(values != NULL); + + for(int i = start, j = 0; i < end; ++i, ++j) + { + this->vec_[i] = values[j]; + } } } diff --git a/src/base/local_vector.cpp b/src/base/local_vector.cpp index c3b290fa..563e1167 100644 --- a/src/base/local_vector.cpp +++ b/src/base/local_vector.cpp @@ -1068,8 +1068,8 @@ namespace rocalution { log_debug(this, "LocalVector::SetIndexArray()", size, index); - assert(size > 0); - assert(index != NULL); + assert(size >= 0); + assert(index != NULL || size == 0); this->vector_->SetIndexArray(size, index); } @@ -1079,8 +1079,6 @@ namespace rocalution { log_debug(this, "LocalVector::GetIndexValues()", values); - assert(values != NULL); - this->vector_->GetIndexValues(values); } @@ -1112,10 +1110,10 @@ namespace rocalution { log_debug(this, "LocalVector::SetContinuousValues()", start, end, values); - assert(values != NULL); assert(start >= 0); assert(end >= start); assert(end <= this->GetSize()); + assert(values != NULL || end - start == 0); this->vector_->SetContinuousValues(start, end, values); }